Ejemplo n.º 1
0
def map_to_ground_truth(Xs_mh, ls_mh, theta, theta0):
    F, mu, sigma, sigma_l = theta
    F0, mu0, sigma0, sigma_l0 = theta0
    m, s = st.greedy_permutation(st.proj_V(F0), st.proj_V(F))
    F = s * F[:, m]
    mu = mu[m]
    Xs_mh = s * Xs_mh[:, :, m]
    ls_mh = ls_mh[:, m]
    return Xs_mh, ls_mh, (F, mu, sigma, sigma_l), m, s
Ejemplo n.º 2
0
def init_saem_grad(As, p, n_iter=10, step=0.1, setting="gaussian"):
    global model
    if setting == "binary":
        model = model_bin
    n_samples, n, _ = As.shape
    theta, _, _ = init_saem(As, p)
    F, mu, sigma, sigma_l = theta
    sigma = 1
    sigma_l = 1
    mode = st.proj_V(F)
    Xs = np.array([mode.copy() for _ in range(n_samples)])
    ls = mu[None, :] + sigma_l * np.random.randn(n_samples, p)
    lks = []
    it = trange(n_iter)
    prop_l = 1
    current_log_lk = np.array([
        model.log_lk_partial(Xs[i], ls[i], As[i], theta)
        for i in range(n_samples)
    ])
    for t in it:
        mode = st.proj_V(F)
        posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2)
        for _ in range(10):
            for i in range(n_samples):
                if t % 5 == 0:
                    m, s = st.greedy_permutation(mode, Xs[i])
                    Xs[i] = s * Xs[i][:, m]
                    ls[i] = ls[i][m]

                grad_X = model.log_lk_partial_grad_X(Xs[i], ls[i], As[i],
                                                     theta)
                grad_X = grad_X / norm(grad_X)
                Xs[i] = st.proj_V(Xs[i] + step * grad_X)

                # [l] Generate next move
                l2 = ls[i] + prop_l * np.random.randn(p)
                # [l] Compute the acceptance log-probability
                new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
                log_alpha = new_log_lk - current_log_lk[i]
                # [l] Accept or reject
                if np.log(np.random.rand()) < log_alpha:
                    ls[i] = l2
                    current_log_lk[i] = new_log_lk

        F = vmf.mle(Xs.mean(axis=0))
        mu = ls.mean(axis=0)
        sigma = ((As - st.comp_numba_many(Xs, ls))**2).mean()
        sigma_l = ((ls - mu)**2).mean()
        theta = (F, mu, sigma, sigma_l)
        lks.append(model.log_lk(Xs, ls, As, theta, normalized=True))
        it.set_postfix({"lk": lks[-1]})
    return theta, Xs, ls, lks
Ejemplo n.º 3
0
def map_to_ground_truth_cluster(Xs_mh, ls_mh, zs_mh, theta, theta0, M=None):
    F, mu, sigma, sigma_l, pi = theta
    F0, mu0, sigma0, sigma_l0, pi0 = theta0
    n_samples, n, p = Xs_mh.shape
    K = len(pi)
    ms = np.zeros((K, K, p), dtype=np.int32)
    ss = np.zeros((K, K, p))
    E = np.zeros((K, K))
    for k in range(K):
        for l in range(K):
            X = st.proj_V(F0[k])
            Y = st.proj_V(F[l])
            ms[k, l], ss[k, l] = st.greedy_permutation(X, Y)
            E[k, l] += st.discr(F0[k], ss[k, l] * F[l][:, ms[k, l]])
            E[k, l] += np.linalg.norm(mu0[k] - mu[l][ms[k, l]])

    if M is None:
        M = np.zeros(K, dtype=np.int32)
        iM = np.zeros(K, dtype=np.int32)
        for k in range(K):
            k, l = np.unravel_index(E.argmin(), E.shape)
            E[k, :] = 0
            E[:, l] = 0
            M[k] = l
            iM[l] = k
    else:
        iM = np.zeros(K, dtype=np.int32)
        for k, l in enumerate(M):
            iM[l] = k

    pi = pi[M]
    ms = [ms[k, M[k]] for k in range(K)]
    ss = [ss[k, M[k]] for k in range(K)]

    F_ = F.copy()
    mu_ = mu.copy()
    for k in range(K):
        m = ms[k]
        s = ss[k]
        F[k] = s * F_[M[k]][:, m]
        mu[k] = mu_[M[k]][m]

    zs_mh = np.array([iM[zs_mh[i]] for i in range(n_samples)])
    for i in range(n_samples):
        m = ms[zs_mh[i]]
        s = ss[zs_mh[i]]
        Xs_mh[i] = s * Xs_mh[i][:, m]
        ls_mh[i] = ls_mh[i][m]
    return Xs_mh, ls_mh, zs_mh, (F, mu, sigma, sigma_l, pi), ms, ss
Ejemplo n.º 4
0
def sample_von_mises_fisher(F,
                            n_iter=100,
                            burn=100,
                            stride=10,
                            progress=False):
    """
    Sample from the vMF distribution using an adaptive Metropolis-Hastings algorithm.
    """
    X = st.proj_V(
        F
    )  # Initialize at the mode of the distribution to ensure a fast convergence.
    s = np.linalg.svd(F, compute_uv=False)
    n, p = F.shape
    Xs = np.zeros((n_iter, n, p))
    current_lk = (X * F).sum()
    accepts = 0

    # Adaptive parameters: the proposal variance is tuned along the MCMC
    std = 0.4
    batch = 100
    accepts_hist = np.zeros(batch)
    optimal_rate = 0.234

    total_steps = burn + n_iter * stride
    it = trange(total_steps) if progress else range(total_steps)
    for t in it:
        # The proposal is generated by adding non-manifold noise and projecting back onto the manifold.
        D = std * np.random.randn(n, p) / s
        X2 = st.proj_V(X + D)
        new_lk = (X2 * F).sum()

        if new_lk - current_lk > np.log(np.random.rand()):
            X = X2
            current_lk = new_lk
            accepts += 1
            accepts_hist[t % batch] = 1
        else:
            accepts_hist[t % batch] = 0

        if t >= burn and (t - burn) % stride == 0:
            Xs[(t - burn) // stride] = X

        if t % batch == 0 and t > 0:
            adapt = 2 * (accepts_hist.mean() > optimal_rate) - 1
            std = np.exp(np.log(std) + adapt / np.sqrt(t))

    if progress: print(f"VMF Acceptance rate: {accepts/(burn+n_iter*stride)}")
    return Xs
Ejemplo n.º 5
0
def map_mask(A, mask, theta, n_iter):
    """
    Given a set of coefficients of A, finds the MAP estimator of the
    remaining hidden coefficients and the latent variables (X, l).
    mask is the set of unknown coefficients, given as two arrays of x and y indices.
    The function returns the arrays of values of A, X and l along the MCMC.
    """
    F, mu, sigma, sigma_l = theta
    mx, my = mask
    accepts_X = np.zeros(n_iter)
    n, p = F.shape
    batch = 50

    A = A.copy()
    X = st.proj_V(F)
    l = mu.copy()
    # Posterior standard deviation for lambda:
    posterior_std_l = np.sqrt(1/(1/sigma**2 + 1/sigma_l**2))
    lks = np.zeros(n_iter)
    
    it = range(n_iter)
    for t in it:
        step = 1/(2*t+1)
        
        # [A] Explicit maximum on A
        comp = st.comp_numba_single(X, l)
        for i in range(len(mx)):
            A[mx[i], my[i]] = comp[mx[i],my[i]]
            A[my[i], mx[i]] = comp[mx[i],my[i]]
        
        # [X] Sample on X
        grad_X = model.log_lk_partial_grad_X(X, l, A, theta)
        grad_X = grad_X/norm(grad_X)
        X = st.proj_V(X + step*grad_X)

        # [l] Explicit maximum on lambda
        v = np.diag(X.T@A@X)
        l = (posterior_std_l**2)*(v/sigma**2 + mu/sigma_l**2)
        
        lks[t] = model.log_lk_partial(X, l, A, theta)
        
    return A, X, l, lks
Ejemplo n.º 6
0
def init_saem(As, p):
    n_samples, n, _ = As.shape
    ls = np.zeros((n_samples, p))
    Xs = np.zeros((n_samples, n, p))
    # Compute the eigendecomposition of each adjacency matrix
    for i in range(n_samples):
        ev, u = np.linalg.eig(As[i])
        idx = (-np.abs(ev)).argsort()[:p]
        ls[i] = ev[idx]
        Xs[i] = u[:, idx]
    # Average on the eigenvectors on the Stiefel manifold
    mode = st.proj_V(Xs.mean(axis=0))
    # Permute the eigenvectors to align them with the computed mode
    for i in range(n_samples):
        m, s = st.greedy_permutation(mode, Xs[i])
        Xs[i] = s * Xs[i][:, m]
        ls[i] = ls[i][m]
    # Initialize the parameters from the resulting eigenvectors and eigenvalues
    F = vmf.mle(Xs.mean(axis=0))
    mu = ls.mean(axis=0)
    sigma = (As - st.comp_numba_many(Xs, ls)).std()
    sigma_l = (ls - mu).std()
    return (F, mu, sigma, sigma_l), Xs, ls
Ejemplo n.º 7
0
def mcmc_saem(As,
              Xs_mh,
              ls_mh,
              theta,
              n_iter=100,
              mala=False,
              prop_X=0.01,
              prop_l=0.5,
              n_mcmc=20,
              history=True,
              setting="gaussian"):
    F, mu, sigma, sigma_l = theta

    optimal_rate = 0.234
    batch = 5  # SAEM steps per column permutation step
    n_mcmc = 20  # MCMC steps per SAEM step
    n_samples, n, p = Xs_mh.shape

    # Initialize the exhaustive statistics
    Xs_comp = st.comp(Xs_mh, ls_mh)
    X_bar = Xs_mh.mean(axis=0)
    l_bar = ls_mh.mean(axis=0)
    l2_bar = (ls_mh**2).mean(axis=0).sum()
    s2_bar = ((As - Xs_comp)**2).mean()

    # Initialize the latent variables
    Xs_mhs = [Xs_mh]
    ls_mhs = [ls_mh]
    Xs_mh = Xs_mh.copy()
    ls_mh = ls_mh.copy()
    lks = []

    # Initialize the parameter history
    Fs = [F]
    mus = [mu]
    sigmas = [sigma]
    sigma_ls = [sigma_l]

    for n in trange(n_iter):
        # MCMC step: use Metropolis-Hastings or MALA
        if mala:
            Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mala(As,
                                                         theta,
                                                         n_iter=n_mcmc,
                                                         init=(Xs_mh, ls_mh),
                                                         progress=False,
                                                         prop_X=prop_X,
                                                         prop_l=prop_l,
                                                         setting=setting)
        else:
            Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mh(As,
                                                       theta,
                                                       n_iter=n_mcmc,
                                                       init=(Xs_mh, ls_mh),
                                                       prop_X=prop_X,
                                                       prop_l=prop_l,
                                                       setting=setting)

        if n % batch == 0 and n < n_iter / 3:
            mode = st.proj_V(F)
            for i in range(n_samples):
                perm, sign = st.greedy_permutation(mode, Xs_mh[i])
                Xs_mh[i] = sign * Xs_mh[i][:, perm]
                ls_mh[i] = ls_mh[i][perm]

        # Update proposal variance for adaptive MCMC
        adaptive_X = 2 * (rate_X > optimal_rate) - 1
        prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6)
        adaptive_l = 2 * (rate_l > optimal_rate) - 1
        prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6)

        # Maximization step

        # Update the stochastic approximation coefficient
        if n < n_iter / 2:
            alpha = 1
        else:
            alpha = 1 / (n - n_iter / 2 + 1)**0.6

        # Update the exhaustive statistics
        Xs_comp = st.comp(Xs_mh, ls_mh)
        X_bar = (1 - alpha) * X_bar + alpha * Xs_mh.mean(axis=0)
        l_bar = (1 - alpha) * l_bar + alpha * ls_mh.mean(axis=0)
        l2_bar = (1 - alpha) * l2_bar + alpha * (ls_mh**2).mean(axis=0).sum()
        s2_bar = (1 - alpha) * s2_bar + alpha * ((As - Xs_comp)**2).mean()

        # Update sigma
        sigma = np.sqrt(s2_bar)
        # Update F
        F = vmf.mle(X_bar, orth=True)
        # Update mu
        mu = l_bar
        # Update sigma_l
        sigma_l = np.sqrt((norm(mu)**2 + l2_bar - 2 * (mu * l_bar).sum()) / p)

        theta = (F, mu, sigma, sigma_l)

        # Store the current complete log-likelihood
        if setting == "gaussian":
            lks.append(model.log_lk(Xs_mh, ls_mh, As, theta, normalized=True))
        elif setting == "binary":
            lks.append(
                model_bin.log_lk(Xs_mh, ls_mh, As, theta, normalized=True))

        Fs.append(F)
        mus.append(mu)
        sigmas.append(sigma)
        sigma_ls.append(sigma_l)

        # if history is True, store the values of Xs and ls along the Markov chain
        if history:
            Xs_mhs.append(Xs_mh.copy())
            ls_mhs.append(ls_mh.copy())

    result = {
        "theta": theta,
        "Xs_mh": Xs_mh,
        "ls_mh": ls_mh,
        "history": {
            "lks": lks,
            "F": Fs,
            "mu": mus,
            "sigma": sigmas,
            "sigma_l": sigma_ls,
            "Xs_mh": Xs_mhs,
            "ls_mh": ls_mhs
        }
    }

    return result
Ejemplo n.º 8
0
def init_saem_grad_cluster(As, p, K, n_iter=10, step=0.1, setting="gaussian"):
    n_samples, n, _ = As.shape
    kmeans = KMeans(n_clusters=K).fit(As.reshape(n_samples, -1))
    zs = kmeans.labels_

    F = np.zeros((K, n, p))
    mu = np.zeros((K, p))
    sigma = np.zeros(K)
    sigma_l = np.zeros(K)
    pi = np.bincount(zs) / n_samples
    for k in range(K):
        idx = np.where(zs == k)[0]
        (F[k], mu[k], sigma[k], sigma_l[k]), _, _ = init_saem(As[idx], p)

    mode = [st.proj_V(F[k]) for k in range(K)]
    Xs = np.array([mode[zs[i]].copy() for i in range(n_samples)])
    ls = mu[zs]

    lks = []
    prop_l = 1
    it = trange(n_iter)
    current_log_lk = np.array([
        model.log_lk_partial(
            Xs[i], ls[i], As[i],
            (F[zs[k]], mu[zs[k]], sigma[zs[k]], sigma_l[zs[k]]))
        for i in range(n_samples)
    ])
    for t in it:
        mode = [st.proj_V(F[k]) for k in range(K)]
        posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2)
        for _ in range(10):
            for i in range(n_samples):
                if t % 5 == 0:
                    m, s = st.greedy_permutation(mode[k], Xs[i])
                    Xs[i] = s * Xs[i][:, m]
                    ls[i] = ls[i][m]

                k = zs[i]
                theta = (F[k], mu[k], sigma[k], sigma_l[k])

                if setting == "gaussian":
                    grad_X = model.log_lk_partial_grad_X(
                        Xs[i], ls[i], As[i], theta)
                elif setting == "binary":
                    grad_X = model_bin.log_lk_partial_grad_X(
                        Xs[i], ls[i], As[i], theta)
                grad_X = grad_X / norm(grad_X)
                Xs[i] = st.proj_V(Xs[i] + step * grad_X)

                # [l] Generate next move
                l2 = ls[i] + prop_l * np.random.randn(p)
                # [l] Compute the acceptance log-probability
                if setting == "gaussian":
                    new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
                elif setting == "binary":
                    new_log_lk = model_bin.log_lk_partial(
                        Xs[i], l2, As[i], theta)
                log_alpha = new_log_lk - current_log_lk[i]
                # [l] Accept or reject
                if np.log(np.random.rand()) < log_alpha:
                    ls[i] = l2
                    current_log_lk[i] = new_log_lk

        for k in range(K):
            idx = np.where(zs == k)[0]
            F[k] = vmf.mle(Xs[idx].mean(axis=0))
            mu[k] = ls[idx].mean(axis=0)
            sigma[k] = ((As[idx] -
                         st.comp_numba_many(Xs[idx], ls[idx]))**2).mean()
            sigma_l[k] = ((ls[idx] - mu[k])**2).mean()

        if setting == "gaussian":
            lks.append(
                model_cluster.log_lk(Xs,
                                     ls,
                                     zs,
                                     As, (F, mu, sigma, sigma_l, pi),
                                     normalized=True))
        elif setting == "binary":
            lks.append(
                model_cluster.log_lk(Xs,
                                     ls,
                                     zs,
                                     As, (F, mu, sigma, sigma_l, pi),
                                     normalized=True))

        it.set_postfix({"lk": lks[-1]})
    return (F, mu, sigma, sigma_l, pi), Xs, ls, zs, lks
Ejemplo n.º 9
0
def mcmc_saem_cluster(As,
                      Xs_mh,
                      ls_mh,
                      zs_mh,
                      theta,
                      n_iter=100,
                      prop_X=0.01,
                      prop_l=0.5,
                      n_mcmc=20,
                      history=True,
                      setting="gaussian",
                      T=0):
    F, mu, sigma, sigma_l, pi = theta

    optimal_rate = 0.234
    batch = 5
    n_samples, n, p = Xs_mh.shape
    K = len(pi)

    # Initialize the exhaustive statistics for each cluster
    Xs_comp = st.comp(Xs_mh, ls_mh)
    X_bar = np.zeros((K, n, p))
    l_bar = np.zeros((K, p))
    l2_bar = np.zeros(K)
    s2_bar = np.zeros(K)
    for k in range(K):
        idx = np.where(zs_mh == k)[0]
        # Check that the cluster is not empty
        if len(idx) > 0:
            X_bar[k] = Xs_mh[idx].mean(axis=0)
            l_bar[k] = ls_mh[idx].mean(axis=0)
            l2_bar[k] = (ls_mh[idx]**2).mean(axis=0).sum()
            s2_bar[k] = ((As[idx] - Xs_comp[idx])**2).mean()
        else:
            X_bar[k] = Xs_mh.mean(axis=0)
            l_bar[k] = ls_mh.mean(axis=0)
            l2_bar[k] = (ls_mh**2).mean(axis=0).sum()
            s2_bar[k] = ((As - Xs_comp)**2).mean()

    # Initialize the latent variables
    Xs_mh = Xs_mh.copy()
    ls_mh = ls_mh.copy()
    zs_mh = zs_mh.copy().astype(np.int32)
    Xs_mhs = [Xs_mh]
    ls_mhs = [ls_mh]
    zs_mhs = [zs_mh]
    lks = []

    # Initialize the parameter history
    Fs = [F]
    mus = [mu]
    sigmas = [sigma]
    sigma_ls = [sigma_l]
    pis = [pi]

    for n in trange(n_iter):
        # MCMC step
        temp = 1 + T / (n + 1)**0.6
        Xs_mh, ls_mh, zs_mh, _, rate_X, rate_l = mcmc.mh_cluster(
            As,
            theta,
            n_iter=n_mcmc,
            init=(Xs_mh, ls_mh, zs_mh),
            prop_X=prop_X,
            prop_l=prop_l,
            setting=setting,
            T=temp)

        if n % batch == 0:
            mode = [st.proj_V(F[k]) for k in range(K)]
            mu_old = mu.copy()
            # Align the F parameters of each cluster to the first cluster.
            for k in range(1, K):
                perm, sign = st.greedy_permutation(mode[0], mode[k])
                F[k] = sign * F[k][:, perm]
                mu[k] = mu[k][perm]
            # Permute the X columns to best match the F parameter of their cluster
            if n < n_iter // 3 or norm(mu_old - mu) > 0:
                for i in range(n_samples):
                    perm, sign = st.greedy_permutation(mode[zs_mh[i]],
                                                       Xs_mh[i])
                    Xs_mh[i] = sign * Xs_mh[i][:, perm]
                    ls_mh[i] = ls_mh[i][perm]

        # Update proposal variance for adaptive MCMC
        adaptive_X = 2 * (rate_X > optimal_rate) - 1
        prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6)
        adaptive_l = 2 * (rate_l > optimal_rate) - 1
        prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6)

        # Maximization step

        # Update the stochastic approximation coefficient
        if n < n_iter / 2:
            alpha = 1
        else:
            alpha = 1 / (n - n_iter / 2 + 1)**0.6

        # Update the exhaustive statistics
        Xs_comp = st.comp(Xs_mh, ls_mh)
        for k in range(K):
            idx = np.where(zs_mh == k)[0]
            if len(idx) > 0:
                X_bar_new = Xs_mh[idx].mean(axis=0)
                l_bar_new = ls_mh[idx].mean(axis=0)
                l2_bar_new = (ls_mh[idx]**2).mean(axis=0).sum()
                s2_bar_new = ((As[idx] - Xs_comp[idx])**2).mean()

                X_bar[k] = (1 - alpha) * X_bar[k] + alpha * X_bar_new
                l_bar[k] = (1 - alpha) * l_bar[k] + alpha * l_bar_new
                l2_bar[k] = (1 - alpha) * l2_bar[k] + alpha * l2_bar_new
                s2_bar[k] = (1 - alpha) * s2_bar[k] + alpha * s2_bar_new

        # Update the parameters for each cluster
        for k in range(K):
            sigma[k] = np.sqrt(s2_bar[k])
            F[k] = vmf.mle(X_bar[k], orth=True)
            mu[k] = l_bar[k]
            sigma_l[k] = np.sqrt((norm(mu[k])**2 + l2_bar[k] - 2 *
                                  (mu[k] * l_bar[k]).sum()) / p)
        pi = pi.copy()
        for k in range(K):
            pi[k] = (zs_mh == k).mean()

        theta = (F, mu, sigma, sigma_l, pi)

        # Store the current complete log-likelihood
        if setting == "gaussian":
            lks.append(
                model_cluster.log_lk(Xs_mh,
                                     ls_mh,
                                     zs_mh,
                                     As,
                                     theta,
                                     normalized=True))
        elif setting == "binary":
            lks.append(
                model_bin_cluster.log_lk(Xs_mh,
                                         ls_mh,
                                         zs_mh,
                                         As,
                                         theta,
                                         normalized=True))

        Fs.append(F)
        mus.append(mu)
        sigmas.append(sigma)
        sigma_ls.append(sigma_l)
        pis.append(pi)

        # if history is True, store the values of Xs and ls along the Markov chain
        if history:
            Xs_mhs.append(Xs_mh.copy())
            ls_mhs.append(ls_mh.copy())
            zs_mhs.append(zs_mh.copy())

    result = {
        "theta": theta,
        "Xs_mh": Xs_mh,
        "ls_mh": ls_mh,
        "zs_mh": zs_mh,
        "history": {
            "lks": lks,
            "F": Fs,
            "mu": mus,
            "sigma": sigmas,
            "sigma_l": sigma_ls,
            "pi": pis,
            "Xs_mh": Xs_mhs,
            "ls_mh": ls_mhs,
            "zs_mh": zs_mhs
        }
    }

    return result
Ejemplo n.º 10
0
def mh(As, theta, n_iter, init=None, prop_X=0.01, prop_l=0.5, setting="gaussian"):
    """
    Metropolis within Gibbs sampler for the base model.
    - setting can be set to "binary" to handle binary networks
    - prop_X and prop_l are the proposal variances for X and l
    The function returns the final values of X and l, as well as the running likelihood
    and the chain acceptance rates.
    """
    F, mu, sigma, sigma_l = theta
    n_samples = As.shape[0]
    accepts_X = np.zeros((n_iter, n_samples))
    accepts_l = np.zeros((n_iter, n_samples))
    n, p = F.shape[-2:]
    if init==None:
        mode = st.proj_V(F)
        Xs = np.zeros((n_samples, n, p))
        ls = sigma_l*np.random.randn(n_samples, p)
        for i in range(n_samples):
            Xs[i] = mode
            ls[i] += mu
    else:
        Xs, ls = init
        Xs = Xs.copy()
        ls = ls.copy()
    
    if setting=="gaussian":
        current_log_lk = np.array([model.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    elif setting=="binary":
        current_log_lk = np.array([model_bin.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    
    for t in range(n_iter):
        for i in range(n_samples):
            # [X] Generate next move
            D = prop_X*np.random.randn(n,p)
            X2 = st.proj_V(Xs[i] + D)
            # [X] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model.log_lk_partial(X2, ls[i], As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin.log_lk_partial(X2, ls[i], As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i]
            # [X] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                Xs[i] = X2
                current_log_lk[i] = new_log_lk
                accepts_X[t,i] = 1
            else:
                accepts_X[t,i] = 0
            
            # [l] Generate next move
            l2 = ls[i] + prop_l*np.random.randn(p)
            # [l] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin.log_lk_partial(Xs[i], l2, As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i]
            # [l] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                ls[i] = l2
                current_log_lk[i] = new_log_lk
                accepts_l[t,i] = 1
            else:
                accepts_l[t,i] = 0
            
    return Xs, ls, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()
Ejemplo n.º 11
0
def mh_mask(A, mask, theta, n_iter, init=None, progress=True, prop_X=0.02):
    """
    Given a set of coefficients of A, runs a MCMC chain to sample from the
    remaining hidden coefficients and the latent variables (X, l).
    - mask is the set of unknown coefficients, given as two arrays of x and y indices
    - prop_X is the initial proposal variance
    The function returns the arrays of values of A, X and l along the MCMC.
    """
    A_init = A.copy()
    F, mu, sigma, sigma_l = theta
    mx, my = mask
    accepts_X = np.zeros(n_iter)
    n, p = F.shape
    batch = 50
    optimal_rate = 0.234
    if init is None:
        X = st.proj_V(F)
        l = mu.copy()
    else:
        A, X, l = init
    
    # Posterior variance for lambda:
    posterior_std_l = np.sqrt(1/(1/sigma**2 + 1/sigma_l**2))
    sv_F = np.array([norm(F[:,i]) for i in range(p)])
    lks = np.zeros(n_iter)
    A_mh = np.zeros((n_iter, n, n))
    X_mh = np.zeros((n_iter, n, p))
    l_mh = np.zeros((n_iter, p))
    
    it = range(n_iter)
    for t in it:
        lks[t] = model.log_lk_partial(X, l, A, theta)
        # Sample on A
        A2 = A_init.copy()
        comp = st.comp_numba_single(X, l)
        for i in range(len(mx)):
            eps = sigma*np.sqrt(2)*np.random.randn()
            A2[mx[i], my[i]] = comp[mx[i],my[i]] + eps
        A = (A2+A2.T)/2
        
        # [X] Generate next move
        D = prop_X*np.random.randn(n,p)/sv_F
        X2 = st.proj_V(X + D)
        # [X] Compute the acceptance log-probability
        current_log_lk = model.log_lk_partial(X, l, A, theta)
        new_log_lk = model.log_lk_partial(X2, l, A, theta)
        log_alpha = (new_log_lk - current_log_lk) * 100
        # [X] Accept or reject
        if np.log(np.random.rand()) < log_alpha:
            X = X2
            current_log_lk = new_log_lk
            accepts_X[t] = 1
        else:
            accepts_X[t] = 0

        # Sample on lambda
        v = np.diag(X.T@A@X)
        posterior_mean = (posterior_std_l**2)*(v/sigma**2 + mu/sigma_l**2)
        l = posterior_mean
        
        A_mh[t] = A
        X_mh[t] = X
        l_mh[t] = l
        
        # Adaptively tune the acceptance rate
        if t%batch==0 and t>1:
            rate_X = accepts_X[max(0, t-batch):t+1].mean()
            adaptive_X = 2*(rate_X > optimal_rate)-1
            prop_X = np.exp(np.log(prop_X) + 0.5*adaptive_X/np.sqrt(1+n))
        
    return A_mh, X_mh, l_mh, lks
Ejemplo n.º 12
0
def mh_cluster(As, theta, n_iter, init=None, prop_X=0.01, prop_l=0.5, T=1, setting="gaussian"):
    """
    Metropolis within Gibbs sampler for the mixture model.
    - setting can be set to "binary" to handle binary networks
    - prop_X and prop_l are the proposal variances for X and l
    - T is the level of tempering for the z variable (cluster labels)
    The function returns the final values of X, l and z, as well as the running likelihood
    and the chain acceptance rates.
    """
    F, mu, sigma, sigma_l, pi = theta
    K = len(pi)
    n, p = F.shape[1:]
    n_samples = As.shape[0]
    accepts_X = np.zeros((n_iter, n_samples))
    accepts_l = np.zeros((n_iter, n_samples))
    vmf_constants = np.array([spa.log_vmf(F[k]) for k in range(K)])
    
    if init is None:
        mode = [st.proj_V(F[k]) for k in range(K)]
        zs = np.array([np.random.randint(K) for _ in range(n_samples)]).astype(np.int32)
        Xs = np.zeros((n_samples, n, p))
        ls = sigma_l*np.random.randn(n_samples, p)
        for i in range(n_samples):
            Xs[i] = mode[zs[i]]
            ls[i] += mu[zs[i]]
    else:
        Xs, ls, zs = init
        Xs = Xs.copy()
        ls = ls.copy()
        zs = zs.copy()
    
    if setting=="gaussian":
        current_log_lk = np.array([model_cluster.log_lk_partial(Xs[i], ls[i], zs[i], As[i], theta) for i in range(n_samples)])
    elif setting=="binary":
        current_log_lk = np.array([model_bin_cluster.log_lk_partial(Xs[i], ls[i], zs[i], As[i], theta) for i in range(n_samples)])
    
    for t in range(n_iter):
        for i in range(n_samples):
            # [z] Explicit sampling on z
            if setting=="gaussian":
                log_probs = (1/T) * model_cluster.log_lk_partial_z(Xs[i], ls[i], As[i], theta, constants=vmf_constants)
            elif setting=="binary":
                log_probs = (1/T) * model_bin_cluster.log_lk_partial_z(Xs[i], ls[i], As[i], theta, constants=vmf_constants)
            s = logsumexp(log_probs)
            probs = np.exp(log_probs - s)

            # Sample z manually from its cdf (np.random.choice is not availabla in numba)
            cumulative_distribution = np.cumsum(probs)
            cumulative_distribution /= cumulative_distribution[-1]
            u = np.random.rand()
            zs[i] = np.searchsorted(cumulative_distribution, u, side="right")
            
            # [X] Generate next move
            D = prop_X*np.random.randn(n,p)
            X2 = st.proj_V(Xs[i] + D)
            # [X] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model_cluster.log_lk_partial(X2, ls[i], zs[i], As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin_cluster.log_lk_partial(X2, ls[i], zs[i], As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i]
            # [X] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                Xs[i] = X2
                current_log_lk[i] = new_log_lk
                accepts_X[t,i] = 1
            else:
                accepts_X[t,i] = 0
            
            # [l] Generate next move
            l2 = ls[i] + prop_l*np.random.randn(p)
            # [l] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model_cluster.log_lk_partial(Xs[i], l2, zs[i], As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin_cluster.log_lk_partial(Xs[i], l2, zs[i], As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i]
            # [l] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                ls[i] = l2
                current_log_lk[i] = new_log_lk
                accepts_l[t,i] = 1
            else:
                accepts_l[t,i] = 0
            
    return Xs, ls, zs, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()
Ejemplo n.º 13
0
def mala(As, theta, n_iter, init=None, progress=True,
         prop_X=0.01, prop_l=0.5, setting="gaussian"):
    """
    Metropolis Adjusted Langevin Algorithm sampler for the base model.
    - setting can be set to "binary" to handle binary networks
    - prop_X and prop_l are the proposal variances for X and l
    The function returns the final values of X and l, as well as the running likelihood
    and the chain acceptance rates.
    """
    F, mu, sigma, sigma_l = theta
    n_samples = As.shape[0]
    accepts_X = np.zeros((n_iter, n_samples))
    accepts_l = np.zeros((n_iter, n_samples))
    n, p = F.shape[-2:]
    if init is None:
        mode = st.proj_V(F)
        Xs = np.array([mode.copy() for _ in range(n_samples)])
        ls = mu[None,:] + sigma_l*np.random.randn(n_samples, p)
    else:
        Xs, ls = init
        Xs = Xs.copy()
        ls = ls.copy()
    
    if setting=="gaussian":
        current_log_lk = np.array([model.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    elif setting=="binary":
        current_log_lk = np.array([model_bin.log_lk_partial(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    
    step_X = 0.5*prop_X**2
    step_l = 0.5*prop_l**2
    if setting=="gaussian":
        current_grad_X  = np.array([model.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    elif setting=="binary":
        current_grad_X  = np.array([model_bin.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    current_grad_X = np.array([g/norm(g) for g in current_grad_X])
    current_drift_X = np.array([st.proj_V(Xs[i] + step_X*current_grad_X[i]) for i in range(n_samples)])
    if setting=="gaussian":
        current_grad_lambda  = np.array([model.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    elif setting=="binary":
        current_grad_lambda  = np.array([model_bin.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta) for i in range(n_samples)])
    current_grad_lambda = np.array([g/norm(g) for g in current_grad_lambda])
    
    it = trange(n_iter) if progress else range(n_iter)
    for t in it:
        for i in range(n_samples):
            # [X] Generate next move
            D = prop_X*np.random.randn(n,p)
            grad_X = current_grad_X[i]
            drift_X = current_drift_X[i]
            D += step_X * grad_X
            X2 = st.proj_V(Xs[i] + D)
            if setting=="gaussian":
                grad_X2 = model.log_lk_partial_grad_X(X2, ls[i], As[i], theta)
            elif setting=="binary":
                grad_X2 = model_bin.log_lk_partial_grad_X(X2, ls[i], As[i], theta)
            grad_X2 = grad_X2/norm(grad_X2)
            drift_X2 = st.proj_V(X2 + step_X*grad_X2)
            mala_jump = (-st.discr(Xs[i], drift_X2) + st.discr(X2, drift_X)) / (2*prop_X**2)
            # [X] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model.log_lk_partial(X2, ls[i], As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin.log_lk_partial(X2, ls[i], As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i] + mala_jump
            # [X] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                Xs[i] = X2
                current_log_lk[i] = new_log_lk
                accepts_X[t,i] = 1
                current_grad_X[i] = grad_X2
                current_drift_X[i] = drift_X2
                if setting=="gaussian":
                    g = model.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta)
                elif setting=="binary":
                    g = model_bin.log_lk_partial_grad_lambda(Xs[i], ls[i], As[i], theta)
                current_grad_lambda[i] = g/norm(g)
            else:
                accepts_X[t,i] = 0
            
            # [l] Generate next move
            l2 = ls[i] + prop_l*np.random.randn(p)
            grad_l = current_grad_lambda[i]
            l2 += step_l * grad_l
            if setting=="gaussian":
                grad_l2 = model.log_lk_partial_grad_lambda(Xs[i], l2, As[i], theta)
            elif setting=="binary":
                grad_l2 = model_bin.log_lk_partial_grad_lambda(Xs[i], l2, As[i], theta)
            grad_l2 = grad_l2/norm(grad_l2)
            mala_jump = (-norm(ls[i]-l2-step_l*grad_l2)**2 + norm(l2-ls[i]-step_l*grad_l)**2) / (2*prop_l**2)
            # [l] Compute the acceptance log-probability
            if setting=="gaussian":
                new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
            elif setting=="binary":
                new_log_lk = model_bin.log_lk_partial(Xs[i], l2, As[i], theta)
            log_alpha = new_log_lk - current_log_lk[i] + mala_jump
            # [l] Accept or reject
            if np.log(np.random.rand()) < log_alpha:
                ls[i] = l2
                current_log_lk[i] = new_log_lk
                accepts_l[t,i] = 1
                current_grad_lambda[i] = grad_l2
                if setting=="gaussian":
                    g = model.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta)
                elif setting=="binary":
                    g = model_bin.log_lk_partial_grad_X(Xs[i], ls[i], As[i], theta)
                current_grad_X[i] = g/norm(g)
                current_drift_X[i] = st.proj_V(Xs[i] + step_X*current_grad_X[i])
            else:
                accepts_l[t,i] = 0
            
            
        if progress: it.set_postfix({"log_lk": current_log_lk.sum()})
    if progress: print("Acceptance rates", accepts_X.mean(), accepts_l.mean())
        
    return Xs, ls, current_log_lk.sum(), accepts_X.mean(), accepts_l.mean()