def create_multi_mk_model_2(tree, chars, Qtype, pi, nregime=2): """ Create an mk model with multiple regimes to be sampled from with MCMC. Allows multiple switches between regimes Regime number is fixed and the location of the regime shift is allowed to change """ # Preparations nchar = len(set(chars)) if Qtype=="ER": N = 1 elif Qtype=="Sym": N = int(binom(nchar, 2)) elif Qtype=="ARD": N = int((nchar ** 2 - nchar)) else: ValueError("Qtype must be one of: ER, Sym, ARD") # This model has 2 components: Q parameters and a switchpoint # They are combined in a custom likelihood function ########################################################################### # Regime locations ########################################################################### # def get_indices(tree, locs, inds=inds): # for i,n in enumerate(tree.descendants()): # inds[i] =[ j for j,l in enumerate(locs) if n.ni in l ][0] # return inds @pymc.stochastic def branchRegimes(tree=tree, nregime=nregime): def logp (value, tree, nregime): locs = np.empty(nregime, dtype=object) for reg in range(nregime): locs[reg] = [i+1 for i,v in enumerate(value) if v==reg] nswitches = nshifts(tree, value) # WHAT SHOULD BE THE PRIOR ON NUMBER OF SWITCHES? return pymc.exponential_like(nswitches, beta=1) def random(tree, nregime): br = np.random.choice(a=nregime, size=len(tree.descendants())) return br @pymc.deterministic(dtype=int) def nswitches(br = branchRegimes, tree=tree, nregime=nregime): locs = np.empty(nregime, dtype=object) for reg in range(nregime): locs[reg] = [i+1 for i,v in enumerate(br) if v==reg] nswitches = nshifts(tree, br) return nswitches ########################################################################### # Qparams: ########################################################################### # Unscaled Q param: Dirichlet distribution # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2 theta = [1.0/2.0]*N # One set of Q-parameters per regime allQparams_init = np.empty(nregime, dtype=object) allQparams_init_full = np.empty(nregime, dtype=object) allScaling_factors = np.empty(nregime, dtype=object) for i in range(nregime): if N != 1: allQparams_init[i] = pymc.Dirichlet("allQparams_init"+str(i), theta) allQparams_init_full[i] = pymc.CompletedDirichlet("allQparams_init_full"+str(i), Qparams_init[i]) else: # Dirichlet function does not like creating a distribution # with only 1 state. Set it to 1 by hand allQparams_init_full[i] = [[1.0]] # Exponential scaling factor for Qparams # Represents the base rate of state change allScaling_factors[i] = pymc.Exponential(name="allScaling_factors"+str(i), beta=1.0) # Scaled Qparams; we would not expect them to necessarily add # to 1 as would be the case in a Dirichlet distribution @pymc.deterministic(plot=False) def Qparams(q=allQparams_init_full, s=allScaling_factors): Qs = np.empty([N, nregime]) for n in range(N): for i in range(nregime): Qs[n][i] = q[i][0][n]*s[i] return Qs ########################################################################### # Likelihood ########################################################################### # The likelihood function # Pre-allocating arrays qarray = np.zeros([N,nregime]) locs = np.empty(nregime, dtype=object) l = discrete.create_likelihood_function_multimk_b(tree=tree, chars=chars, Qtype=Qtype, pi="Equal", min=False, nregime=2) @pymc.potential def multi_mklik(q = Qparams, br=branchRegimes.random(), nregime=nregime, name="multi_mklik"): for reg in range(nregime): locs[reg] = [i+1 for i,v in enumerate(br) if v==reg] # l = discrete.create_likelihood_function_multimk(tree=tree, chars=chars, # Qtype=Qtype, locs = locs, # pi="Equal", min=False) np.copyto(qarray, q) return l(qarray[0], locs) return locals()
def create_multi_mk_model(tree, chars, Qtype, pi, nregime=2): """ Create an mk model with multiple regimes to be sampled from with MCMC. Regime number is fixed and the location of the regime shift is allowed to change """ # Preparations nchar = len(set(chars)) if Qtype=="ER": N = 1 elif Qtype=="Sym": N = int(binom(nchar, 2)) elif Qtype=="ARD": N = int((nchar ** 2 - nchar)) else: ValueError("Qtype must be one of: ER, Sym, ARD") # This model has 2 components: Q parameters and a switchpoint # They are combined in a custom likelihood function ########################################################################### # Switchpoint: ########################################################################### # Modeling the movement of the regime shift(s) is the tricky part # Regime shifts will only be allowed to happen at a node # Regime shift: Uniform categorical distribution valid_switches = [i.ni for i in tree if not (i.isleaf or i.isroot)] # Uniform switch_ind = pymc.DiscreteUniform("switch_ind",lower=0, upper=len(valid_switches)-1) @pymc.deterministic(dtype=int) def switch(name="switch",switch_ind=switch_ind): return valid_switches[switch_ind] ########################################################################### # Qparams: ########################################################################### # Unscaled Q param: Dirichlet distribution # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2 theta = [1.0/2.0]*N # One set of Q-parameters per regime allQparams_init = np.empty(nregime, dtype=object) allQparams_init_full = np.empty(nregime, dtype=object) allScaling_factors = np.empty(nregime, dtype=object) for i in range(nregime): if N != 1: allQparams_init[i] = pymc.Dirichlet("allQparams_init"+str(i), theta) allQparams_init_full[i] = pymc.CompletedDirichlet("allQparams_init_full"+str(i), allQparams_init[i]) else: # Dirichlet function does not like creating a distribution # with only 1 state. Set it to 1 by hand allQparams_init_full[i] = [[1.0]] # Exponential scaling factor for Qparams allScaling_factors[i] = pymc.Exponential(name="allScaling_factors"+str(i), beta=1.0) # Scaled Qparams; we would not expect them to necessarily add # to 1 as would be the case in a Dirichlet distribution # Regimes are grouped by rows. Each row is a regime. @pymc.deterministic(plot=False) def Qparams(q=allQparams_init_full, s=allScaling_factors): Qs = np.empty([nregime,N]) for n in range(N): for i in range(nregime): Qs[i][n] = q[i][0][n]*s[i] return Qs ########################################################################### # Likelihood ########################################################################### # The likelihood function # Pre-allocating arrays qarray = np.zeros([nregime,N]) locsarray = np.empty([2], dtype=object) l = discrete.create_likelihood_function_multimk_b(tree=tree, chars=chars, Qtype=Qtype, pi="Equal", min=False, nregime=2) @pymc.potential def multi_mklik(q = Qparams, switch=switch, name="multi_mklik"): locs = discrete.locs_from_switchpoint(tree,tree[int(switch)],locsarray) # l = discrete.create_likelihood_function_multimk(tree=tree, chars=chars, # Qtype=Qtype, locs = locs, # pi="Equal", min=False) np.copyto(qarray, q) return l(qarray, locs=locs) return locals()