Beispiel #1
0
def NNI(t):
	"""
	Randomly select an internal node to do NNI
	alters the tree <t>

	Returns: t, (new) order
	"""
	while True:
		parent = random.choice(t.internal_nodes())
		# choose one of the kids as target
		# and another as sibling
		target, sibling = random.sample(parent.child_nodes(), 2)
		if target.is_leaf():
			continue
		else:
			# select one children from target to swap w/ sibling
			child = random.choice(target.child_nodes())
			break

	print >> sys.stderr, "NNI: parent {0}, target {1}, sibling {2}, child {3}".format(\
			parent.label, target.label, sibling.label, child.label)

	# swap child & sibling in tree
	new_child_branch = child.edge_length + target.edge_length
	new_sibling_branch = sibling.edge_length - target.edge_length
	parent.remove_child(sibling)
	target.remove_child(child)
	parent.add_child(child, new_child_branch)
	target.add_child(sibling, new_sibling_branch)

	# obtain new order via postorder traversal (should be fast enough)
	order = Tree.postorder_assign_then_traverse(t, None, do_assign=False)

	return t, order
Beispiel #2
0
def optimize_branch_fast(tlobj, tprime, children_index_list):
	"""
	Quick optimization of subset of branches (indicated by <children_index_list>)
	 in tprime while using tlobj for parameters

	children_index_list --- list of (i, j) indicating that we want to iteratively 
	         refine the branch of node label i --- node label j

	Most likely will be the 3 local branches at the new insertion point of a subtree.
	For fast optimization, relax the branch length variation to 0.1.

	Returns: final (positive) log likelihood of tprime

	NOTE: this only changes branch lengths in tprime. tlobj not affected!!
	NOTE: reversible_subtree_func currently cheats by only recalc-ing the
	      entries of parent and up, to ensure this works, I think making sure
		  copy_{S|P} are correct is important and therefore the two g() calls.
	"""
	g = MyMat.calc_likelihood
	meat = scipy.optimize.fmin_l_bfgs_b

	def reversible_subtree_func(copy_S, copy_P, parent, child, t_a):
		#assert len(t_a) == 1

		child.edge_length = t_a[0]
		order = Tree.postorder_cheat_traverse(parent)

		L_single = g(tlobj.single_model.gtr.R, copy_S, tlobj.log_freq_single, \
				order, range(tlobj.ncol), tlobj.nnode, tlobj.ncol, tlobj.nbase)
		L_paired = g(tlobj.paired_model.gtr.R, copy_P, tlobj.log_freq_paired, \
				order, range(tlobj.ncol_p), tlobj.nnode_p, tlobj.ncol_p, tlobj.nbase_p)

		ans = -(L_single.sum() + L_paired.sum())
		return ans

	# TODO: make this more efficient!
	copy_S = tlobj.S.copy()
	copy_P = tlobj.P.copy() 
	order = Tree.postorder_assign_then_traverse(tprime, None, False)
	g(tlobj.single_model.gtr.R, copy_S, tlobj.log_freq_single, \
		order, range(tlobj.ncol), tlobj.nnode, tlobj.ncol, tlobj.nbase)
	g(tlobj.paired_model.gtr.R, copy_P, tlobj.log_freq_paired, \
		order, range(tlobj.ncol_p), tlobj.nnode_p, tlobj.ncol_p, tlobj.nbase_p)

	changed = True
	while changed:
		changed = False
		for i, j in children_index_list:
			parent = tprime.find_node_with_label(i)
			child = tprime.find_node_with_label(j)
			old_t_a = child.edge_length
			func = lambda x: reversible_subtree_func(copy_S, copy_P, parent, child, x)
			x, fx, d = meat(func, [old_t_a], approx_grad=True, \
					bounds=[(1e-3, 10)], pgtol=1e-2)
#			print "calling func {0}--{1} done".format(i,j), x, fx, d, x[0], old_t_a, abs(x[0] - old_t_a)
			if d['warnflag'] != 0:
				return None, None # handle this appropriately!
			if abs(x[0] - old_t_a) > 0.1:
				changed = True
	return fx, tprime
Beispiel #3
0
	def __init__(self, msa, tree, single_model, paired_model, treat_gap_as_missing):
		"""
		Input:

		msa  --- MSA object (the alignment)
		tree --- initially is the starting tree (dendropy.Tree)
		single/paired model -- EvoModel.{single|paired}model objects

		Also has the following attributes:
		single_cols --- list of unpaired positions
		paired_cols --- list of paired positions (i, j)
		order --- postorder traversal (often changes! beware!)
		like --- positive log likelihood (often changes! beware!)
		nnode, ncol, nbase for single/paired parameters...

		NOTE: ENFORCES tree TO BE BINARY
		"""
		self.msa = msa
		self.tree = tree
		self.single_model = single_model
		self.paired_model = paired_model
		self.treat_gap_as_missing = treat_gap_as_missing
		self.log_freq_single = log(self.single_model.Frequency)
		self.log_freq_paired = log(self.paired_model.Frequency)

		self.single_cols = msa.single_cols()
		self.paired_cols = msa.BP.items()
		self.paired_cols.sort()

		self.nnode = 2*msa.nseq + 1
		self.ncol = len(self.single_cols)
		self.nbase = 5
		self.nnode_p = self.nnode
		self.ncol_p = len(self.paired_cols)
		self.nbase_p = 25

		Tree.make_tree_binary(self.tree) # must preceded self.order!

		self.order = Tree.postorder_assign_then_traverse(tree, list(msa.ids))
		self.like = None # should be the positive log likelihood

		self.S = None # likelihood matrix for single positions
		self.P = None # likelihood matrix for paired positions
Beispiel #4
0
	def update_order(self):
		self.order = Tree.postorder_assign_then_traverse(self.tree, None, False)