def map_to_ground_truth(Xs_mh, ls_mh, theta, theta0): F, mu, sigma, sigma_l = theta F0, mu0, sigma0, sigma_l0 = theta0 m, s = st.greedy_permutation(st.proj_V(F0), st.proj_V(F)) F = s * F[:, m] mu = mu[m] Xs_mh = s * Xs_mh[:, :, m] ls_mh = ls_mh[:, m] return Xs_mh, ls_mh, (F, mu, sigma, sigma_l), m, s
def init_saem_grad(As, p, n_iter=10, step=0.1, setting="gaussian"): global model if setting == "binary": model = model_bin n_samples, n, _ = As.shape theta, _, _ = init_saem(As, p) F, mu, sigma, sigma_l = theta sigma = 1 sigma_l = 1 mode = st.proj_V(F) Xs = np.array([mode.copy() for _ in range(n_samples)]) ls = mu[None, :] + sigma_l * np.random.randn(n_samples, p) lks = [] it = trange(n_iter) prop_l = 1 current_log_lk = np.array([ model.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples) ]) for t in it: mode = st.proj_V(F) posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2) for _ in range(10): for i in range(n_samples): if t % 5 == 0: m, s = st.greedy_permutation(mode, Xs[i]) Xs[i] = s * Xs[i][:, m] ls[i] = ls[i][m] grad_X = model.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) grad_X = grad_X / norm(grad_X) Xs[i] = st.proj_V(Xs[i] + step * grad_X) # [l] Generate next move l2 = ls[i] + prop_l * np.random.randn(p) # [l] Compute the acceptance log-probability new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [l] Accept or reject if np.log(np.random.rand()) < log_alpha: ls[i] = l2 current_log_lk[i] = new_log_lk F = vmf.mle(Xs.mean(axis=0)) mu = ls.mean(axis=0) sigma = ((As - st.comp_numba_many(Xs, ls))**2).mean() sigma_l = ((ls - mu)**2).mean() theta = (F, mu, sigma, sigma_l) lks.append(model.log_lk(Xs, ls, As, theta, normalized=True)) it.set_postfix({"lk": lks[-1]}) return theta, Xs, ls, lks
def map_to_ground_truth_cluster(Xs_mh, ls_mh, zs_mh, theta, theta0, M=None): F, mu, sigma, sigma_l, pi = theta F0, mu0, sigma0, sigma_l0, pi0 = theta0 n_samples, n, p = Xs_mh.shape K = len(pi) ms = np.zeros((K, K, p), dtype=np.int32) ss = np.zeros((K, K, p)) E = np.zeros((K, K)) for k in range(K): for l in range(K): X = st.proj_V(F0[k]) Y = st.proj_V(F[l]) ms[k, l], ss[k, l] = st.greedy_permutation(X, Y) E[k, l] += st.discr(F0[k], ss[k, l] * F[l][:, ms[k, l]]) E[k, l] += np.linalg.norm(mu0[k] - mu[l][ms[k, l]]) if M is None: M = np.zeros(K, dtype=np.int32) iM = np.zeros(K, dtype=np.int32) for k in range(K): k, l = np.unravel_index(E.argmin(), E.shape) E[k, :] = 0 E[:, l] = 0 M[k] = l iM[l] = k else: iM = np.zeros(K, dtype=np.int32) for k, l in enumerate(M): iM[l] = k pi = pi[M] ms = [ms[k, M[k]] for k in range(K)] ss = [ss[k, M[k]] for k in range(K)] F_ = F.copy() mu_ = mu.copy() for k in range(K): m = ms[k] s = ss[k] F[k] = s * F_[M[k]][:, m] mu[k] = mu_[M[k]][m] zs_mh = np.array([iM[zs_mh[i]] for i in range(n_samples)]) for i in range(n_samples): m = ms[zs_mh[i]] s = ss[zs_mh[i]] Xs_mh[i] = s * Xs_mh[i][:, m] ls_mh[i] = ls_mh[i][m] return Xs_mh, ls_mh, zs_mh, (F, mu, sigma, sigma_l, pi), ms, ss
def sample_von_mises_fisher(F, n_iter=100, burn=100, stride=10, progress=False): """ Sample from the vMF distribution using an adaptive Metropolis-Hastings algorithm. """ X = st.proj_V( F ) # Initialize at the mode of the distribution to ensure a fast convergence. s = np.linalg.svd(F, compute_uv=False) n, p = F.shape Xs = np.zeros((n_iter, n, p)) current_lk = (X * F).sum() accepts = 0 # Adaptive parameters: the proposal variance is tuned along the MCMC std = 0.4 batch = 100 accepts_hist = np.zeros(batch) optimal_rate = 0.234 total_steps = burn + n_iter * stride it = trange(total_steps) if progress else range(total_steps) for t in it: # The proposal is generated by adding non-manifold noise and projecting back onto the manifold. D = std * np.random.randn(n, p) / s X2 = st.proj_V(X + D) new_lk = (X2 * F).sum() if new_lk - current_lk > np.log(np.random.rand()): X = X2 current_lk = new_lk accepts += 1 accepts_hist[t % batch] = 1 else: accepts_hist[t % batch] = 0 if t >= burn and (t - burn) % stride == 0: Xs[(t - burn) // stride] = X if t % batch == 0 and t > 0: adapt = 2 * (accepts_hist.mean() > optimal_rate) - 1 std = np.exp(np.log(std) + adapt / np.sqrt(t)) if progress: print(f"VMF Acceptance rate: {accepts/(burn+n_iter*stride)}") return Xs
def map_mask(A, mask, theta, n_iter): """ Given a set of coefficients of A, finds the MAP estimator of the remaining hidden coefficients and the latent variables (X, l). mask is the set of unknown coefficients, given as two arrays of x and y indices. The function returns the arrays of values of A, X and l along the MCMC. """ F, mu, sigma, sigma_l = theta mx, my = mask accepts_X = np.zeros(n_iter) n, p = F.shape batch = 50 A = A.copy() X = st.proj_V(F) l = mu.copy() # Posterior standard deviation for lambda: posterior_std_l = np.sqrt(1/(1/sigma**2 + 1/sigma_l**2)) lks = np.zeros(n_iter) it = range(n_iter) for t in it: step = 1/(2*t+1) # [A] Explicit maximum on A comp = st.comp_numba_single(X, l) for i in range(len(mx)): A[mx[i], my[i]] = comp[mx[i],my[i]] A[my[i], mx[i]] = comp[mx[i],my[i]] # [X] Sample on X grad_X = model.log_lk_partial_grad_X(X, l, A, theta) grad_X = grad_X/norm(grad_X) X = st.proj_V(X + step*grad_X) # [l] Explicit maximum on lambda v = np.diag(X.T@A@X) l = (posterior_std_l**2)*(v/sigma**2 + mu/sigma_l**2) lks[t] = model.log_lk_partial(X, l, A, theta) return A, X, l, lks
def init_saem(As, p): n_samples, n, _ = As.shape ls = np.zeros((n_samples, p)) Xs = np.zeros((n_samples, n, p)) # Compute the eigendecomposition of each adjacency matrix for i in range(n_samples): ev, u = np.linalg.eig(As[i]) idx = (-np.abs(ev)).argsort()[:p] ls[i] = ev[idx] Xs[i] = u[:, idx] # Average on the eigenvectors on the Stiefel manifold mode = st.proj_V(Xs.mean(axis=0)) # Permute the eigenvectors to align them with the computed mode for i in range(n_samples): m, s = st.greedy_permutation(mode, Xs[i]) Xs[i] = s * Xs[i][:, m] ls[i] = ls[i][m] # Initialize the parameters from the resulting eigenvectors and eigenvalues F = vmf.mle(Xs.mean(axis=0)) mu = ls.mean(axis=0) sigma = (As - st.comp_numba_many(Xs, ls)).std() sigma_l = (ls - mu).std() return (F, mu, sigma, sigma_l), Xs, ls
def mcmc_saem(As, Xs_mh, ls_mh, theta, n_iter=100, mala=False, prop_X=0.01, prop_l=0.5, n_mcmc=20, history=True, setting="gaussian"): F, mu, sigma, sigma_l = theta optimal_rate = 0.234 batch = 5 # SAEM steps per column permutation step n_mcmc = 20 # MCMC steps per SAEM step n_samples, n, p = Xs_mh.shape # Initialize the exhaustive statistics Xs_comp = st.comp(Xs_mh, ls_mh) X_bar = Xs_mh.mean(axis=0) l_bar = ls_mh.mean(axis=0) l2_bar = (ls_mh**2).mean(axis=0).sum() s2_bar = ((As - Xs_comp)**2).mean() # Initialize the latent variables Xs_mhs = [Xs_mh] ls_mhs = [ls_mh] Xs_mh = Xs_mh.copy() ls_mh = ls_mh.copy() lks = [] # Initialize the parameter history Fs = [F] mus = [mu] sigmas = [sigma] sigma_ls = [sigma_l] for n in trange(n_iter): # MCMC step: use Metropolis-Hastings or MALA if mala: Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mala(As, theta, n_iter=n_mcmc, init=(Xs_mh, ls_mh), progress=False, prop_X=prop_X, prop_l=prop_l, setting=setting) else: Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mh(As, theta, n_iter=n_mcmc, init=(Xs_mh, ls_mh), prop_X=prop_X, prop_l=prop_l, setting=setting) if n % batch == 0 and n < n_iter / 3: mode = st.proj_V(F) for i in range(n_samples): perm, sign = st.greedy_permutation(mode, Xs_mh[i]) Xs_mh[i] = sign * Xs_mh[i][:, perm] ls_mh[i] = ls_mh[i][perm] # Update proposal variance for adaptive MCMC adaptive_X = 2 * (rate_X > optimal_rate) - 1 prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6) adaptive_l = 2 * (rate_l > optimal_rate) - 1 prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6) # Maximization step # Update the stochastic approximation coefficient if n < n_iter / 2: alpha = 1 else: alpha = 1 / (n - n_iter / 2 + 1)**0.6 # Update the exhaustive statistics Xs_comp = st.comp(Xs_mh, ls_mh) X_bar = (1 - alpha) * X_bar + alpha * Xs_mh.mean(axis=0) l_bar = (1 - alpha) * l_bar + alpha * ls_mh.mean(axis=0) l2_bar = (1 - alpha) * l2_bar + alpha * (ls_mh**2).mean(axis=0).sum() s2_bar = (1 - alpha) * s2_bar + alpha * ((As - Xs_comp)**2).mean() # Update sigma sigma = np.sqrt(s2_bar) # Update F F = vmf.mle(X_bar, orth=True) # Update mu mu = l_bar # Update sigma_l sigma_l = np.sqrt((norm(mu)**2 + l2_bar - 2 * (mu * l_bar).sum()) / p) theta = (F, mu, sigma, sigma_l) # Store the current complete log-likelihood if setting == "gaussian": lks.append(model.log_lk(Xs_mh, ls_mh, As, theta, normalized=True)) elif setting == "binary": lks.append( model_bin.log_lk(Xs_mh, ls_mh, As, theta, normalized=True)) Fs.append(F) mus.append(mu) sigmas.append(sigma) sigma_ls.append(sigma_l) # if history is True, store the values of Xs and ls along the Markov chain if history: Xs_mhs.append(Xs_mh.copy()) ls_mhs.append(ls_mh.copy()) result = { "theta": theta, "Xs_mh": Xs_mh, "ls_mh": ls_mh, "history": { "lks": lks, "F": Fs, "mu": mus, "sigma": sigmas, "sigma_l": sigma_ls, "Xs_mh": Xs_mhs, "ls_mh": ls_mhs } } return result
def init_saem_grad_cluster(As, p, K, n_iter=10, step=0.1, setting="gaussian"): n_samples, n, _ = As.shape kmeans = KMeans(n_clusters=K).fit(As.reshape(n_samples, -1)) zs = kmeans.labels_ F = np.zeros((K, n, p)) mu = np.zeros((K, p)) sigma = np.zeros(K) sigma_l = np.zeros(K) pi = np.bincount(zs) / n_samples for k in range(K): idx = np.where(zs == k)[0] (F[k], mu[k], sigma[k], sigma_l[k]), _, _ = init_saem(As[idx], p) mode = [st.proj_V(F[k]) for k in range(K)] Xs = np.array([mode[zs[i]].copy() for i in range(n_samples)]) ls = mu[zs] lks = [] prop_l = 1 it = trange(n_iter) current_log_lk = np.array([ model.log_lk_partial( Xs[i], ls[i], As[i], (F[zs[k]], mu[zs[k]], sigma[zs[k]], sigma_l[zs[k]])) for i in range(n_samples) ]) for t in it: mode = [st.proj_V(F[k]) for k in range(K)] posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2) for _ in range(10): for i in range(n_samples): if t % 5 == 0: m, s = st.greedy_permutation(mode[k], Xs[i]) Xs[i] = s * Xs[i][:, m] ls[i] = ls[i][m] k = zs[i] theta = (F[k], mu[k], sigma[k], sigma_l[k]) if setting == "gaussian": grad_X = model.log_lk_partial_grad_X( Xs[i], ls[i], As[i], theta) elif setting == "binary": grad_X = model_bin.log_lk_partial_grad_X( Xs[i], ls[i], As[i], theta) grad_X = grad_X / norm(grad_X) Xs[i] = st.proj_V(Xs[i] + step * grad_X) # [l] Generate next move l2 = ls[i] + prop_l * np.random.randn(p) # [l] Compute the acceptance log-probability if setting == "gaussian": new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta) elif setting == "binary": new_log_lk = model_bin.log_lk_partial( Xs[i], l2, As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [l] Accept or reject if np.log(np.random.rand()) < log_alpha: ls[i] = l2 current_log_lk[i] = new_log_lk for k in range(K): idx = np.where(zs == k)[0] F[k] = vmf.mle(Xs[idx].mean(axis=0)) mu[k] = ls[idx].mean(axis=0) sigma[k] = ((As[idx] - st.comp_numba_many(Xs[idx], ls[idx]))**2).mean() sigma_l[k] = ((ls[idx] - mu[k])**2).mean() if setting == "gaussian": lks.append( model_cluster.log_lk(Xs, ls, zs, As, (F, mu, sigma, sigma_l, pi), normalized=True)) elif setting == "binary": lks.append( model_cluster.log_lk(Xs, ls, zs, As, (F, mu, sigma, sigma_l, pi), normalized=True)) it.set_postfix({"lk": lks[-1]}) return (F, mu, sigma, sigma_l, pi), Xs, ls, zs, lks
def mcmc_saem_cluster(As, Xs_mh, ls_mh, zs_mh, theta, n_iter=100, prop_X=0.01, prop_l=0.5, n_mcmc=20, history=True, setting="gaussian", T=0): F, mu, sigma, sigma_l, pi = theta optimal_rate = 0.234 batch = 5 n_samples, n, p = Xs_mh.shape K = len(pi) # Initialize the exhaustive statistics for each cluster Xs_comp = st.comp(Xs_mh, ls_mh) X_bar = np.zeros((K, n, p)) l_bar = np.zeros((K, p)) l2_bar = np.zeros(K) s2_bar = np.zeros(K) for k in range(K): idx = np.where(zs_mh == k)[0] # Check that the cluster is not empty if len(idx) > 0: X_bar[k] = Xs_mh[idx].mean(axis=0) l_bar[k] = ls_mh[idx].mean(axis=0) l2_bar[k] = (ls_mh[idx]**2).mean(axis=0).sum() s2_bar[k] = ((As[idx] - Xs_comp[idx])**2).mean() else: X_bar[k] = Xs_mh.mean(axis=0) l_bar[k] = ls_mh.mean(axis=0) l2_bar[k] = (ls_mh**2).mean(axis=0).sum() s2_bar[k] = ((As - Xs_comp)**2).mean() # Initialize the latent variables Xs_mh = Xs_mh.copy() ls_mh = ls_mh.copy() zs_mh = zs_mh.copy().astype(np.int32) Xs_mhs = [Xs_mh] ls_mhs = [ls_mh] zs_mhs = [zs_mh] lks = [] # Initialize the parameter history Fs = [F] mus = [mu] sigmas = [sigma] sigma_ls = [sigma_l] pis = [pi] for n in trange(n_iter): # MCMC step temp = 1 + T / (n + 1)**0.6 Xs_mh, ls_mh, zs_mh, _, rate_X, rate_l = mcmc.mh_cluster( As, theta, n_iter=n_mcmc, init=(Xs_mh, ls_mh, zs_mh), prop_X=prop_X, prop_l=prop_l, setting=setting, T=temp) if n % batch == 0: mode = [st.proj_V(F[k]) for k in range(K)] mu_old = mu.copy() # Align the F parameters of each cluster to the first cluster. for k in range(1, K): perm, sign = st.greedy_permutation(mode[0], mode[k]) F[k] = sign * F[k][:, perm] mu[k] = mu[k][perm] # Permute the X columns to best match the F parameter of their cluster if n < n_iter // 3 or norm(mu_old - mu) > 0: for i in range(n_samples): perm, sign = st.greedy_permutation(mode[zs_mh[i]], Xs_mh[i]) Xs_mh[i] = sign * Xs_mh[i][:, perm] ls_mh[i] = ls_mh[i][perm] # Update proposal variance for adaptive MCMC adaptive_X = 2 * (rate_X > optimal_rate) - 1 prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6) adaptive_l = 2 * (rate_l > optimal_rate) - 1 prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6) # Maximization step # Update the stochastic approximation coefficient if n < n_iter / 2: alpha = 1 else: alpha = 1 / (n - n_iter / 2 + 1)**0.6 # Update the exhaustive statistics Xs_comp = st.comp(Xs_mh, ls_mh) for k in range(K): idx = np.where(zs_mh == k)[0] if len(idx) > 0: X_bar_new = Xs_mh[idx].mean(axis=0) l_bar_new = ls_mh[idx].mean(axis=0) l2_bar_new = (ls_mh[idx]**2).mean(axis=0).sum() s2_bar_new = ((As[idx] - Xs_comp[idx])**2).mean() X_bar[k] = (1 - alpha) * X_bar[k] + alpha * X_bar_new l_bar[k] = (1 - alpha) * l_bar[k] + alpha * l_bar_new l2_bar[k] = (1 - alpha) * l2_bar[k] + alpha * l2_bar_new s2_bar[k] = (1 - alpha) * s2_bar[k] + alpha * s2_bar_new # Update the parameters for each cluster for k in range(K): sigma[k] = np.sqrt(s2_bar[k]) F[k] = vmf.mle(X_bar[k], orth=True) mu[k] = l_bar[k] sigma_l[k] = np.sqrt((norm(mu[k])**2 + l2_bar[k] - 2 * (mu[k] * l_bar[k]).sum()) / p) pi = pi.copy() for k in range(K): pi[k] = (zs_mh == k).mean() theta = (F, mu, sigma, sigma_l, pi) # Store the current complete log-likelihood if setting == "gaussian": lks.append( model_cluster.log_lk(Xs_mh, ls_mh, zs_mh, As, theta, normalized=True)) elif setting == "binary": lks.append( model_bin_cluster.log_lk(Xs_mh, ls_mh, zs_mh, As, theta, normalized=True)) Fs.append(F) mus.append(mu) sigmas.append(sigma) sigma_ls.append(sigma_l) pis.append(pi) # if history is True, store the values of Xs and ls along the Markov chain if history: Xs_mhs.append(Xs_mh.copy()) ls_mhs.append(ls_mh.copy()) zs_mhs.append(zs_mh.copy()) result = { "theta": theta, "Xs_mh": Xs_mh, "ls_mh": ls_mh, "zs_mh": zs_mh, "history": { "lks": lks, "F": Fs, "mu": mus, "sigma": sigmas, "sigma_l": sigma_ls, "pi": pis, "Xs_mh": Xs_mhs, "ls_mh": ls_mhs, "zs_mh": zs_mhs } } return result
def mh(As, theta, n_iter, init=None, prop_X=0.01, prop_l=0.5, setting="gaussian"): """ Metropolis within Gibbs sampler for the base model. - setting can be set to "binary" to handle binary networks - prop_X and prop_l are the proposal variances for X and l The function returns the final values of X and l, as well as the running likelihood and the chain acceptance rates. """ F, mu, sigma, sigma_l = theta n_samples = As.shape[0] accepts_X = np.zeros((n_iter, n_samples)) accepts_l = np.zeros((n_iter, n_samples)) n, p = F.shape[-2:] if init==None: mode = st.proj_V(F) Xs = np.zeros((n_samples, n, p)) ls = sigma_l*np.random.randn(n_samples, p) for i in range(n_samples): Xs[i] = mode ls[i] += mu else: Xs, ls = init Xs = Xs.copy() ls = ls.copy() if setting=="gaussian": current_log_lk = np.array([model.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) elif setting=="binary": current_log_lk = np.array([model_bin.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) for t in range(n_iter): for i in range(n_samples): # [X] Generate next move D = prop_X*np.random.randn(n,p) X2 = st.proj_V(Xs[i] + D) # [X] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model.log_lk_partial(X2, ls[i], As[i], theta) elif setting=="binary": new_log_lk = model_bin.log_lk_partial(X2, ls[i], As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [X] Accept or reject if np.log(np.random.rand()) < log_alpha: Xs[i] = X2 current_log_lk[i] = new_log_lk accepts_X[t,i] = 1 else: accepts_X[t,i] = 0 # [l] Generate next move l2 = ls[i] + prop_l*np.random.randn(p) # [l] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta) elif setting=="binary": new_log_lk = model_bin.log_lk_partial(Xs[i], l2, As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [l] Accept or reject if np.log(np.random.rand()) < log_alpha: ls[i] = l2 current_log_lk[i] = new_log_lk accepts_l[t,i] = 1 else: accepts_l[t,i] = 0 return Xs, ls, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()
def mh_mask(A, mask, theta, n_iter, init=None, progress=True, prop_X=0.02): """ Given a set of coefficients of A, runs a MCMC chain to sample from the remaining hidden coefficients and the latent variables (X, l). - mask is the set of unknown coefficients, given as two arrays of x and y indices - prop_X is the initial proposal variance The function returns the arrays of values of A, X and l along the MCMC. """ A_init = A.copy() F, mu, sigma, sigma_l = theta mx, my = mask accepts_X = np.zeros(n_iter) n, p = F.shape batch = 50 optimal_rate = 0.234 if init is None: X = st.proj_V(F) l = mu.copy() else: A, X, l = init # Posterior variance for lambda: posterior_std_l = np.sqrt(1/(1/sigma**2 + 1/sigma_l**2)) sv_F = np.array([norm(F[:,i]) for i in range(p)]) lks = np.zeros(n_iter) A_mh = np.zeros((n_iter, n, n)) X_mh = np.zeros((n_iter, n, p)) l_mh = np.zeros((n_iter, p)) it = range(n_iter) for t in it: lks[t] = model.log_lk_partial(X, l, A, theta) # Sample on A A2 = A_init.copy() comp = st.comp_numba_single(X, l) for i in range(len(mx)): eps = sigma*np.sqrt(2)*np.random.randn() A2[mx[i], my[i]] = comp[mx[i],my[i]] + eps A = (A2+A2.T)/2 # [X] Generate next move D = prop_X*np.random.randn(n,p)/sv_F X2 = st.proj_V(X + D) # [X] Compute the acceptance log-probability current_log_lk = model.log_lk_partial(X, l, A, theta) new_log_lk = model.log_lk_partial(X2, l, A, theta) log_alpha = (new_log_lk - current_log_lk) * 100 # [X] Accept or reject if np.log(np.random.rand()) < log_alpha: X = X2 current_log_lk = new_log_lk accepts_X[t] = 1 else: accepts_X[t] = 0 # Sample on lambda v = np.diag(X.T@A@X) posterior_mean = (posterior_std_l**2)*(v/sigma**2 + mu/sigma_l**2) l = posterior_mean A_mh[t] = A X_mh[t] = X l_mh[t] = l # Adaptively tune the acceptance rate if t%batch==0 and t>1: rate_X = accepts_X[max(0, t-batch):t+1].mean() adaptive_X = 2*(rate_X > optimal_rate)-1 prop_X = np.exp(np.log(prop_X) + 0.5*adaptive_X/np.sqrt(1+n)) return A_mh, X_mh, l_mh, lks
def mh_cluster(As, theta, n_iter, init=None, prop_X=0.01, prop_l=0.5, T=1, setting="gaussian"): """ Metropolis within Gibbs sampler for the mixture model. - setting can be set to "binary" to handle binary networks - prop_X and prop_l are the proposal variances for X and l - T is the level of tempering for the z variable (cluster labels) The function returns the final values of X, l and z, as well as the running likelihood and the chain acceptance rates. """ F, mu, sigma, sigma_l, pi = theta K = len(pi) n, p = F.shape[1:] n_samples = As.shape[0] accepts_X = np.zeros((n_iter, n_samples)) accepts_l = np.zeros((n_iter, n_samples)) vmf_constants = np.array([spa.log_vmf(F[k]) for k in range(K)]) if init is None: mode = [st.proj_V(F[k]) for k in range(K)] zs = np.array([np.random.randint(K) for _ in range(n_samples)]).astype(np.int32) Xs = np.zeros((n_samples, n, p)) ls = sigma_l*np.random.randn(n_samples, p) for i in range(n_samples): Xs[i] = mode[zs[i]] ls[i] += mu[zs[i]] else: Xs, ls, zs = init Xs = Xs.copy() ls = ls.copy() zs = zs.copy() if setting=="gaussian": current_log_lk = np.array([model_cluster.log_lk_partial(Xs[i], ls[i], zs[i], As[i], theta) for i in range(n_samples)]) elif setting=="binary": current_log_lk = np.array([model_bin_cluster.log_lk_partial(Xs[i], ls[i], zs[i], As[i], theta) for i in range(n_samples)]) for t in range(n_iter): for i in range(n_samples): # [z] Explicit sampling on z if setting=="gaussian": log_probs = (1/T) * model_cluster.log_lk_partial_z(Xs[i], ls[i], As[i], theta, constants=vmf_constants) elif setting=="binary": log_probs = (1/T) * model_bin_cluster.log_lk_partial_z(Xs[i], ls[i], As[i], theta, constants=vmf_constants) s = logsumexp(log_probs) probs = np.exp(log_probs - s) # Sample z manually from its cdf (np.random.choice is not availabla in numba) cumulative_distribution = np.cumsum(probs) cumulative_distribution /= cumulative_distribution[-1] u = np.random.rand() zs[i] = np.searchsorted(cumulative_distribution, u, side="right") # [X] Generate next move D = prop_X*np.random.randn(n,p) X2 = st.proj_V(Xs[i] + D) # [X] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model_cluster.log_lk_partial(X2, ls[i], zs[i], As[i], theta) elif setting=="binary": new_log_lk = model_bin_cluster.log_lk_partial(X2, ls[i], zs[i], As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [X] Accept or reject if np.log(np.random.rand()) < log_alpha: Xs[i] = X2 current_log_lk[i] = new_log_lk accepts_X[t,i] = 1 else: accepts_X[t,i] = 0 # [l] Generate next move l2 = ls[i] + prop_l*np.random.randn(p) # [l] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model_cluster.log_lk_partial(Xs[i], l2, zs[i], As[i], theta) elif setting=="binary": new_log_lk = model_bin_cluster.log_lk_partial(Xs[i], l2, zs[i], As[i], theta) log_alpha = new_log_lk - current_log_lk[i] # [l] Accept or reject if np.log(np.random.rand()) < log_alpha: ls[i] = l2 current_log_lk[i] = new_log_lk accepts_l[t,i] = 1 else: accepts_l[t,i] = 0 return Xs, ls, zs, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()
def mala(As, theta, n_iter, init=None, progress=True, prop_X=0.01, prop_l=0.5, setting="gaussian"): """ Metropolis Adjusted Langevin Algorithm sampler for the base model. - setting can be set to "binary" to handle binary networks - prop_X and prop_l are the proposal variances for X and l The function returns the final values of X and l, as well as the running likelihood and the chain acceptance rates. """ F, mu, sigma, sigma_l = theta n_samples = As.shape[0] accepts_X = np.zeros((n_iter, n_samples)) accepts_l = np.zeros((n_iter, n_samples)) n, p = F.shape[-2:] if init is None: mode = st.proj_V(F) Xs = np.array([mode.copy() for _ in range(n_samples)]) ls = mu[None,:] + sigma_l*np.random.randn(n_samples, p) else: Xs, ls = init Xs = Xs.copy() ls = ls.copy() if setting=="gaussian": current_log_lk = np.array([model.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) elif setting=="binary": current_log_lk = np.array([model_bin.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) step_X = 0.5*prop_X**2 step_l = 0.5*prop_l**2 if setting=="gaussian": current_grad_X = np.array([model.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) elif setting=="binary": current_grad_X = np.array([model_bin.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) current_grad_X = np.array([g/norm(g) for g in current_grad_X]) current_drift_X = np.array([st.proj_V(Xs[i] + step_X*current_grad_X[i]) for i in range(n_samples)]) if setting=="gaussian": current_grad_lambda = np.array([model.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) elif setting=="binary": current_grad_lambda = np.array([model_bin.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) for i in range(n_samples)]) current_grad_lambda = np.array([g/norm(g) for g in current_grad_lambda]) it = trange(n_iter) if progress else range(n_iter) for t in it: for i in range(n_samples): # [X] Generate next move D = prop_X*np.random.randn(n,p) grad_X = current_grad_X[i] drift_X = current_drift_X[i] D += step_X * grad_X X2 = st.proj_V(Xs[i] + D) if setting=="gaussian": grad_X2 = model.log_lk_partial_grad_X(X2, ls[i], As[i], theta) elif setting=="binary": grad_X2 = model_bin.log_lk_partial_grad_X(X2, ls[i], As[i], theta) grad_X2 = grad_X2/norm(grad_X2) drift_X2 = st.proj_V(X2 + step_X*grad_X2) mala_jump = (-st.discr(Xs[i], drift_X2) + st.discr(X2, drift_X)) / (2*prop_X**2) # [X] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model.log_lk_partial(X2, ls[i], As[i], theta) elif setting=="binary": new_log_lk = model_bin.log_lk_partial(X2, ls[i], As[i], theta) log_alpha = new_log_lk - current_log_lk[i] + mala_jump # [X] Accept or reject if np.log(np.random.rand()) < log_alpha: Xs[i] = X2 current_log_lk[i] = new_log_lk accepts_X[t,i] = 1 current_grad_X[i] = grad_X2 current_drift_X[i] = drift_X2 if setting=="gaussian": g = model.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) elif setting=="binary": g = model_bin.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) current_grad_lambda[i] = g/norm(g) else: accepts_X[t,i] = 0 # [l] Generate next move l2 = ls[i] + prop_l*np.random.randn(p) grad_l = current_grad_lambda[i] l2 += step_l * grad_l if setting=="gaussian": grad_l2 = model.log_lk_partial_grad_lambda(Xs[i], l2, As[i], theta) elif setting=="binary": grad_l2 = model_bin.log_lk_partial_grad_lambda(Xs[i], l2, As[i], theta) grad_l2 = grad_l2/norm(grad_l2) mala_jump = (-norm(ls[i]-l2-step_l*grad_l2)**2 + norm(l2-ls[i]-step_l*grad_l)**2) / (2*prop_l**2) # [l] Compute the acceptance log-probability if setting=="gaussian": new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta) elif setting=="binary": new_log_lk = model_bin.log_lk_partial(Xs[i], l2, As[i], theta) log_alpha = new_log_lk - current_log_lk[i] + mala_jump # [l] Accept or reject if np.log(np.random.rand()) < log_alpha: ls[i] = l2 current_log_lk[i] = new_log_lk accepts_l[t,i] = 1 current_grad_lambda[i] = grad_l2 if setting=="gaussian": g = model.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) elif setting=="binary": g = model_bin.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) current_grad_X[i] = g/norm(g) current_drift_X[i] = st.proj_V(Xs[i] + step_X*current_grad_X[i]) else: accepts_l[t,i] = 0 if progress: it.set_postfix({"log_lk": current_log_lk.sum()}) if progress: print("Acceptance rates", accepts_X.mean(), accepts_l.mean()) return Xs, ls, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()