def hrm_multipass_bayesian(tree, chars, Qtype, nregime, pi="Fitzjohn"): """ Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be in the form of 0,1,2,... pi (str): Either "Equal", "Equilibrium", or "Fitzjohn". How to weight values at root node. Defaults to "Equal" Method "Fitzjohn" is not thouroughly tested, use with caution Qtype: Either a string specifying how to esimate values for Q or a numpy array of a pre-specified Q matrix. "Simple": Symmetric rates within observed states and between rates. "STD": State Transitions Different. Transitions between states within the same rate class are asymetrical nregime (int): Number of hidden states. nstates = 0 is equivalent to a vanilla Mk model """ nobschar = len(set(chars)) nchar = nobschar * nregime # assert nobschar == 2, "Can currently only handle 4x4 Q matrix" # assert nregime == 2, "Can currently only handle 4x4 Q matrix" assert Qtype in ["Simple", "STD", "RTD", "ARD"], "Q type must be one of: simple, STD, RTD, ARD" ########################################################################### # Qparams: ########################################################################### # The simple model has # of parameters equal to nregime + nregime-1 (One set of # rates for each regime, plus transition rates between regimes) # For now, we will have each be exponentially distributed # Simplest model: all transitions between states within a regime are equal. # Each regime has a rate associated with it. # There is one rate between regimes. # Number of parameters = nregime+1 if Qtype == "Simple": # Wtihin-regime transitions WR_Qparams = np.ndarray(nregime, dtype="object") for i in range(nregime): WR_Qparams[i] = pymc.Exponential(name="wr-par"+str(i), beta = 1.0, value = 1e-2+(i/100.0)) # Between-regime transitions: BR_Qparams = pymc.Exponential(name="br-par", beta = 1.0, value = 1e-2) # State-transitions different. Transitions between states within a # rate regime can differ (ARD). Transitions between rates share one # rate parameter. # Number of parameters = nregime*(nobschar**2-nobschar) + 1 if Qtype == "STD": theta = [1.0/2.0] * nobschar i_d = np.ndarray(nregime, dtype="object") c_d = np.ndarray(nregime, dtype="object") scale = np.ndarray(nregime, dtype="object") # Within-regime transitions WR_Qparams = np.ndarray(nregime, dtype="object") for i in range(nregime): # First, create a dirichlet distribution i_d[i] = pymc.Dirichlet("parInit_"+str(i), theta, value = [1.0/nobschar]*(nobschar-1)) c_d[i] = pymc.CompletedDirichlet("parInit"+str(i), i_d[i]) scale[i] = pymc.Exponential(name="scaling"+str(i), beta=1.0, value=1e-2+(i/100.0)) # Then, scale dirichlet distribution by overall rate parameter for that regime @pymc.deterministic(plot=False,name="wr-par"+str(i)) def d_scaled(d = c_d[i], s = scale[i]): return (d*s)[0] WR_Qparams[i] = d_scaled BR_Qparams = pymc.Exponential(name="br-par", beta = 1.0, value = 1e-2) if Qtype == "RTD": WR_Qparams = np.ndarray(nregime, dtype="object") for i in range(nregime): WR_Qparams[i] = pymc.Exponential(name="wr-par"+str(i), beta = 1.0, value = 1e-2+(i/100.0)) BR_Qparams = np.ndarray(nregime-1, dtype="object") for i in range(nregime-1): BR_Qparams[i] = pymc.Exponential(name="br-par"+str(i), beta=1.0, value=1e-2) if Qtype == "ARD": theta = [1.0/2.0] * nobschar i_d = np.ndarray(nregime, dtype="object") c_d = np.ndarray(nregime, dtype="object") scale = np.ndarray(nregime, dtype="object") # Within-regime transitions WR_Qparams = np.ndarray(nregime, dtype="object") for i in range(nregime): # First, create a dirichlet distribution i_d[i] = pymc.Dirichlet("parInit_"+str(i), theta, value = [1.0/nobschar]*(nobschar-1)) c_d[i] = pymc.CompletedDirichlet("parInit"+str(i), i_d[i]) scale[i] = pymc.Exponential(name="scaling"+str(i), beta=1.0, value=1e-2+(i/100.0)) # Then, scale dirichlet distribution by overall rate parameter for that regime @pymc.deterministic(plot=False,name="wr-par"+str(i)) def d_scaled(d = c_d[i], s = scale[i]): return (d*s)[0] WR_Qparams[i] = d_scaled BR_Qparams = np.ndarray((nregime-1)*2*nobschar, dtype="object") br_i = 0 for i in range(nregime-1)*2: for n in range(nobschar): BR_Qparams[br_i] = pymc.Exponential(name="br-par"+str(br_i), beta=1.0, value=1e-2) br_i += 1 ########################################################################### # Likelihood ########################################################################### l = discrete.create_likelihood_function_hrmmultipass_mk(tree=tree, chars=chars, nregime=nregime, Qtype="ARD", pi=pi, min=False) @pymc.potential def mklik(wr = WR_Qparams, br=BR_Qparams, name="mklik"): if Qtype == "Simple": # Getting the locations of each Q parameter to feed # to the likelihood function # Note that the likelihood function takes q parameters # in coumnwise-order, not counting zero and negative values. # Within-regime shifts qinds = {} for i,q in enumerate(wr): qinds[i]=valid_indices(nobschar, nregime, i,i) rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1]) qinds[i+1] = [] # Between-regime shifts(all share 1 rate) for p in rshift_pairs: qinds[i+1].extend(valid_indices(nobschar, nregime, p[0],p[1])) qinds[i+1].extend(valid_indices(nobschar, nregime, p[1],p[0])) # These are the indices of the values we will give to # the likelihood function, in order param_indices = sorted([ i for v in qinds.values() for i in v]) qparam_list = list(wr)+[br] # Making a single list to get parameters from Qparams = [] # Empty list for values to feed to lik function for pi in param_indices: qi = [ k for k,v in qinds.iteritems() if pi in v ][0] Qparams.append(qparam_list[qi]) # Pulling out the correct param # Qparams now contains the parameters needed in the # correct order for the likelihood function. if ((sorted(list(wr)) == list(wr)) and (br < wr[nregime-1])): return l(np.array(Qparams)) else: return -np.inf if Qtype == "STD": qinds = {} n=0 for i,q in enumerate(wr): for k in range(nregime): qinds[n] = [valid_indices(nobschar, nregime, i, i)[k]] n+=1 rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1]) qinds[n] = [] # Between-regime shifts(all share 1 rate) for p in rshift_pairs: qinds[n].extend(valid_indices(nobschar, nregime, p[0],p[1])) qinds[n].extend(valid_indices(nobschar, nregime, p[1],p[0])) param_indices = sorted([ i for v in qinds.values() for i in v]) qparam_list = [i for s in [q for q in wr] for i in s]+[br] # Making a single list to get parameters from Qparams = [] # Empty list for values to feed to lik function for pi in param_indices: qi = [ k for k,v in qinds.iteritems() if pi in v ][0] Qparams.append(qparam_list[qi]) # Pulling out the correct param # Conditions to be enforced: # All corresponding rates in each regime must be ordered (fast must be faster than medium, etc) # Rate for shift between regimes cannot be faster than the fastest # rate within regimes for i in range(nregime): n = [q[i] for q in wr] if not sorted(n) == n: return -np.inf if br > max(wr[nregime-1]): return -np.inf return l(np.array(Qparams)) if Qtype == "RTD": qinds = {} for i,q in enumerate(wr): qinds[i]=valid_indices(nobschar, nregime, i,i) if Qtype == "ARD": qinds = {} n=0 for i,q in enumerate(wr): for k in range(nregime): qinds[n] = [valid_indices(nobschar, nregime, i, i)[k]] n+=1 rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1]) for p in rshift_pairs: for i in valid_indices(nobschar, nregime, p[0],p[1]): qinds[n] = [i] n+=1 for i in valid_indices(nobschar, nregime, p[1],p[0]): qinds[n] = [i] n+=1 param_indices = sorted([ i for v in qinds.values() for i in v]) qparam_list = [i for s in [q for q in wr] for i in s]+[b for b in br] # Making a single list to get parameters from Qparams = [] # Empty list for values to feed to lik function for pi in param_indices: qi = [ k for k,v in qinds.iteritems() if pi in v ][0] Qparams.append(qparam_list[qi]) # Pulling out the correct param # Conditions to be enforced: # All corresponding rates in each regime must be ordered (fast must be faster than medium, etc) # Max rate for shift between regimes cannot be faster than the fastest # rate within regimes for i in range(nregime): n = [q[i] for q in wr] if not sorted(n) == n: return -np.inf if max(br) > max(wr[nregime-1]): return -np.inf return l(np.array(Qparams)) return locals()
Q = ivy.chars.discrete.fill_Q_matrix(2,2, [0.0,1.0,0.00,1.0,1.0,5.5,1.0,5.5]) nregime = 2 p = None pi = "Fitzjohn" returnPi = False preallocated_arrays = None tip_states = None Qtype="ARD" min=True Qparams = np.array([0.0,1.0,0.0,1.0,1.0,5.5,1.0,5.5]) f = discrete.create_likelihood_function_hrmmultipass_mk(tree, chars, 2, "ARD") Qparams = np.array([0.0,1.0,0.0,1.0,1.0,5.5,1.0,5.5]) # # tree.ape_node_idx() # # for t in tree: # t.label = t.apeidx # # # treeni = 38
chars = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1] Q = ivy.chars.discrete.fill_Q_matrix(2, 2, [0.0, 1.0, 0.00, 1.0, 1.0, 5.5, 1.0, 5.5]) nregime = 2 p = None pi = "Fitzjohn" returnPi = False preallocated_arrays = None tip_states = None Qtype = "ARD" min = True Qparams = np.array([0.0, 1.0, 0.0, 1.0, 1.0, 5.5, 1.0, 5.5]) f = discrete.create_likelihood_function_hrmmultipass_mk(tree, chars, 2, "ARD") Qparams = np.array([0.0, 1.0, 0.0, 1.0, 1.0, 5.5, 1.0, 5.5]) # # tree.ape_node_idx() # # for t in tree: # t.label = t.apeidx # # # treeni = 38 # # # node1ni = 18 # node1n = preallocated_arrays["nodelist-up"][18]
l_single = discrete.create_likelihood_function_hrm_mk(tree=tree, chars=chars, nregime=nregime, Qtype="ARD", pi=pi, min=False) def loop1(): for i in range(5): l_single(np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])) cProfile.run("loop1()") l_multipass = discrete.create_likelihood_function_hrmmultipass_mk( tree=tree, chars=chars, nregime=nregime, Qtype="ARD", pi=pi, min=False) def loop2(): for i in range(5): l_multipass(Qparams) cProfile.run("loop2()") var = copy.deepcopy(var1) def loop3(): for i in range(5 * 11): discrete.hrm_back_mk(tree, chars, Q, nregime, preallocated_arrays=var)