コード例 #1
0
def map_to_ground_truth(Xs_mh, ls_mh, theta, theta0):
    F, mu, sigma, sigma_l = theta
    F0, mu0, sigma0, sigma_l0 = theta0
    m, s = st.greedy_permutation(st.proj_V(F0), st.proj_V(F))
    F = s * F[:, m]
    mu = mu[m]
    Xs_mh = s * Xs_mh[:, :, m]
    ls_mh = ls_mh[:, m]
    return Xs_mh, ls_mh, (F, mu, sigma, sigma_l), m, s
コード例 #2
0
def init_saem_grad(As, p, n_iter=10, step=0.1, setting="gaussian"):
    global model
    if setting == "binary":
        model = model_bin
    n_samples, n, _ = As.shape
    theta, _, _ = init_saem(As, p)
    F, mu, sigma, sigma_l = theta
    sigma = 1
    sigma_l = 1
    mode = st.proj_V(F)
    Xs = np.array([mode.copy() for _ in range(n_samples)])
    ls = mu[None, :] + sigma_l * np.random.randn(n_samples, p)
    lks = []
    it = trange(n_iter)
    prop_l = 1
    current_log_lk = np.array([
        model.log_lk_partial(Xs[i], ls[i], As[i], theta)
        for i in range(n_samples)
    ])
    for t in it:
        mode = st.proj_V(F)
        posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2)
        for _ in range(10):
            for i in range(n_samples):
                if t % 5 == 0:
                    m, s = st.greedy_permutation(mode, Xs[i])
                    Xs[i] = s * Xs[i][:, m]
                    ls[i] = ls[i][m]

                grad_X = model.log_lk_partial_grad_X(Xs[i], ls[i], As[i],
                                                     theta)
                grad_X = grad_X / norm(grad_X)
                Xs[i] = st.proj_V(Xs[i] + step * grad_X)

                # [l] Generate next move
                l2 = ls[i] + prop_l * np.random.randn(p)
                # [l] Compute the acceptance log-probability
                new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
                log_alpha = new_log_lk - current_log_lk[i]
                # [l] Accept or reject
                if np.log(np.random.rand()) < log_alpha:
                    ls[i] = l2
                    current_log_lk[i] = new_log_lk

        F = vmf.mle(Xs.mean(axis=0))
        mu = ls.mean(axis=0)
        sigma = ((As - st.comp_numba_many(Xs, ls))**2).mean()
        sigma_l = ((ls - mu)**2).mean()
        theta = (F, mu, sigma, sigma_l)
        lks.append(model.log_lk(Xs, ls, As, theta, normalized=True))
        it.set_postfix({"lk": lks[-1]})
    return theta, Xs, ls, lks
コード例 #3
0
def map_to_ground_truth_cluster(Xs_mh, ls_mh, zs_mh, theta, theta0, M=None):
    F, mu, sigma, sigma_l, pi = theta
    F0, mu0, sigma0, sigma_l0, pi0 = theta0
    n_samples, n, p = Xs_mh.shape
    K = len(pi)
    ms = np.zeros((K, K, p), dtype=np.int32)
    ss = np.zeros((K, K, p))
    E = np.zeros((K, K))
    for k in range(K):
        for l in range(K):
            X = st.proj_V(F0[k])
            Y = st.proj_V(F[l])
            ms[k, l], ss[k, l] = st.greedy_permutation(X, Y)
            E[k, l] += st.discr(F0[k], ss[k, l] * F[l][:, ms[k, l]])
            E[k, l] += np.linalg.norm(mu0[k] - mu[l][ms[k, l]])

    if M is None:
        M = np.zeros(K, dtype=np.int32)
        iM = np.zeros(K, dtype=np.int32)
        for k in range(K):
            k, l = np.unravel_index(E.argmin(), E.shape)
            E[k, :] = 0
            E[:, l] = 0
            M[k] = l
            iM[l] = k
    else:
        iM = np.zeros(K, dtype=np.int32)
        for k, l in enumerate(M):
            iM[l] = k

    pi = pi[M]
    ms = [ms[k, M[k]] for k in range(K)]
    ss = [ss[k, M[k]] for k in range(K)]

    F_ = F.copy()
    mu_ = mu.copy()
    for k in range(K):
        m = ms[k]
        s = ss[k]
        F[k] = s * F_[M[k]][:, m]
        mu[k] = mu_[M[k]][m]

    zs_mh = np.array([iM[zs_mh[i]] for i in range(n_samples)])
    for i in range(n_samples):
        m = ms[zs_mh[i]]
        s = ss[zs_mh[i]]
        Xs_mh[i] = s * Xs_mh[i][:, m]
        ls_mh[i] = ls_mh[i][m]
    return Xs_mh, ls_mh, zs_mh, (F, mu, sigma, sigma_l, pi), ms, ss
コード例 #4
0
def init_saem(As, p):
    n_samples, n, _ = As.shape
    ls = np.zeros((n_samples, p))
    Xs = np.zeros((n_samples, n, p))
    # Compute the eigendecomposition of each adjacency matrix
    for i in range(n_samples):
        ev, u = np.linalg.eig(As[i])
        idx = (-np.abs(ev)).argsort()[:p]
        ls[i] = ev[idx]
        Xs[i] = u[:, idx]
    # Average on the eigenvectors on the Stiefel manifold
    mode = st.proj_V(Xs.mean(axis=0))
    # Permute the eigenvectors to align them with the computed mode
    for i in range(n_samples):
        m, s = st.greedy_permutation(mode, Xs[i])
        Xs[i] = s * Xs[i][:, m]
        ls[i] = ls[i][m]
    # Initialize the parameters from the resulting eigenvectors and eigenvalues
    F = vmf.mle(Xs.mean(axis=0))
    mu = ls.mean(axis=0)
    sigma = (As - st.comp_numba_many(Xs, ls)).std()
    sigma_l = (ls - mu).std()
    return (F, mu, sigma, sigma_l), Xs, ls
コード例 #5
0
def mcmc_saem(As,
              Xs_mh,
              ls_mh,
              theta,
              n_iter=100,
              mala=False,
              prop_X=0.01,
              prop_l=0.5,
              n_mcmc=20,
              history=True,
              setting="gaussian"):
    F, mu, sigma, sigma_l = theta

    optimal_rate = 0.234
    batch = 5  # SAEM steps per column permutation step
    n_mcmc = 20  # MCMC steps per SAEM step
    n_samples, n, p = Xs_mh.shape

    # Initialize the exhaustive statistics
    Xs_comp = st.comp(Xs_mh, ls_mh)
    X_bar = Xs_mh.mean(axis=0)
    l_bar = ls_mh.mean(axis=0)
    l2_bar = (ls_mh**2).mean(axis=0).sum()
    s2_bar = ((As - Xs_comp)**2).mean()

    # Initialize the latent variables
    Xs_mhs = [Xs_mh]
    ls_mhs = [ls_mh]
    Xs_mh = Xs_mh.copy()
    ls_mh = ls_mh.copy()
    lks = []

    # Initialize the parameter history
    Fs = [F]
    mus = [mu]
    sigmas = [sigma]
    sigma_ls = [sigma_l]

    for n in trange(n_iter):
        # MCMC step: use Metropolis-Hastings or MALA
        if mala:
            Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mala(As,
                                                         theta,
                                                         n_iter=n_mcmc,
                                                         init=(Xs_mh, ls_mh),
                                                         progress=False,
                                                         prop_X=prop_X,
                                                         prop_l=prop_l,
                                                         setting=setting)
        else:
            Xs_mh, ls_mh, lk, rate_X, rate_l = mcmc.mh(As,
                                                       theta,
                                                       n_iter=n_mcmc,
                                                       init=(Xs_mh, ls_mh),
                                                       prop_X=prop_X,
                                                       prop_l=prop_l,
                                                       setting=setting)

        if n % batch == 0 and n < n_iter / 3:
            mode = st.proj_V(F)
            for i in range(n_samples):
                perm, sign = st.greedy_permutation(mode, Xs_mh[i])
                Xs_mh[i] = sign * Xs_mh[i][:, perm]
                ls_mh[i] = ls_mh[i][perm]

        # Update proposal variance for adaptive MCMC
        adaptive_X = 2 * (rate_X > optimal_rate) - 1
        prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6)
        adaptive_l = 2 * (rate_l > optimal_rate) - 1
        prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6)

        # Maximization step

        # Update the stochastic approximation coefficient
        if n < n_iter / 2:
            alpha = 1
        else:
            alpha = 1 / (n - n_iter / 2 + 1)**0.6

        # Update the exhaustive statistics
        Xs_comp = st.comp(Xs_mh, ls_mh)
        X_bar = (1 - alpha) * X_bar + alpha * Xs_mh.mean(axis=0)
        l_bar = (1 - alpha) * l_bar + alpha * ls_mh.mean(axis=0)
        l2_bar = (1 - alpha) * l2_bar + alpha * (ls_mh**2).mean(axis=0).sum()
        s2_bar = (1 - alpha) * s2_bar + alpha * ((As - Xs_comp)**2).mean()

        # Update sigma
        sigma = np.sqrt(s2_bar)
        # Update F
        F = vmf.mle(X_bar, orth=True)
        # Update mu
        mu = l_bar
        # Update sigma_l
        sigma_l = np.sqrt((norm(mu)**2 + l2_bar - 2 * (mu * l_bar).sum()) / p)

        theta = (F, mu, sigma, sigma_l)

        # Store the current complete log-likelihood
        if setting == "gaussian":
            lks.append(model.log_lk(Xs_mh, ls_mh, As, theta, normalized=True))
        elif setting == "binary":
            lks.append(
                model_bin.log_lk(Xs_mh, ls_mh, As, theta, normalized=True))

        Fs.append(F)
        mus.append(mu)
        sigmas.append(sigma)
        sigma_ls.append(sigma_l)

        # if history is True, store the values of Xs and ls along the Markov chain
        if history:
            Xs_mhs.append(Xs_mh.copy())
            ls_mhs.append(ls_mh.copy())

    result = {
        "theta": theta,
        "Xs_mh": Xs_mh,
        "ls_mh": ls_mh,
        "history": {
            "lks": lks,
            "F": Fs,
            "mu": mus,
            "sigma": sigmas,
            "sigma_l": sigma_ls,
            "Xs_mh": Xs_mhs,
            "ls_mh": ls_mhs
        }
    }

    return result
コード例 #6
0
def init_saem_grad_cluster(As, p, K, n_iter=10, step=0.1, setting="gaussian"):
    n_samples, n, _ = As.shape
    kmeans = KMeans(n_clusters=K).fit(As.reshape(n_samples, -1))
    zs = kmeans.labels_

    F = np.zeros((K, n, p))
    mu = np.zeros((K, p))
    sigma = np.zeros(K)
    sigma_l = np.zeros(K)
    pi = np.bincount(zs) / n_samples
    for k in range(K):
        idx = np.where(zs == k)[0]
        (F[k], mu[k], sigma[k], sigma_l[k]), _, _ = init_saem(As[idx], p)

    mode = [st.proj_V(F[k]) for k in range(K)]
    Xs = np.array([mode[zs[i]].copy() for i in range(n_samples)])
    ls = mu[zs]

    lks = []
    prop_l = 1
    it = trange(n_iter)
    current_log_lk = np.array([
        model.log_lk_partial(
            Xs[i], ls[i], As[i],
            (F[zs[k]], mu[zs[k]], sigma[zs[k]], sigma_l[zs[k]]))
        for i in range(n_samples)
    ])
    for t in it:
        mode = [st.proj_V(F[k]) for k in range(K)]
        posterior_std_l = 1 / (1 / sigma**2 + 1 / sigma_l**2)
        for _ in range(10):
            for i in range(n_samples):
                if t % 5 == 0:
                    m, s = st.greedy_permutation(mode[k], Xs[i])
                    Xs[i] = s * Xs[i][:, m]
                    ls[i] = ls[i][m]

                k = zs[i]
                theta = (F[k], mu[k], sigma[k], sigma_l[k])

                if setting == "gaussian":
                    grad_X = model.log_lk_partial_grad_X(
                        Xs[i], ls[i], As[i], theta)
                elif setting == "binary":
                    grad_X = model_bin.log_lk_partial_grad_X(
                        Xs[i], ls[i], As[i], theta)
                grad_X = grad_X / norm(grad_X)
                Xs[i] = st.proj_V(Xs[i] + step * grad_X)

                # [l] Generate next move
                l2 = ls[i] + prop_l * np.random.randn(p)
                # [l] Compute the acceptance log-probability
                if setting == "gaussian":
                    new_log_lk = model.log_lk_partial(Xs[i], l2, As[i], theta)
                elif setting == "binary":
                    new_log_lk = model_bin.log_lk_partial(
                        Xs[i], l2, As[i], theta)
                log_alpha = new_log_lk - current_log_lk[i]
                # [l] Accept or reject
                if np.log(np.random.rand()) < log_alpha:
                    ls[i] = l2
                    current_log_lk[i] = new_log_lk

        for k in range(K):
            idx = np.where(zs == k)[0]
            F[k] = vmf.mle(Xs[idx].mean(axis=0))
            mu[k] = ls[idx].mean(axis=0)
            sigma[k] = ((As[idx] -
                         st.comp_numba_many(Xs[idx], ls[idx]))**2).mean()
            sigma_l[k] = ((ls[idx] - mu[k])**2).mean()

        if setting == "gaussian":
            lks.append(
                model_cluster.log_lk(Xs,
                                     ls,
                                     zs,
                                     As, (F, mu, sigma, sigma_l, pi),
                                     normalized=True))
        elif setting == "binary":
            lks.append(
                model_cluster.log_lk(Xs,
                                     ls,
                                     zs,
                                     As, (F, mu, sigma, sigma_l, pi),
                                     normalized=True))

        it.set_postfix({"lk": lks[-1]})
    return (F, mu, sigma, sigma_l, pi), Xs, ls, zs, lks
コード例 #7
0
def mcmc_saem_cluster(As,
                      Xs_mh,
                      ls_mh,
                      zs_mh,
                      theta,
                      n_iter=100,
                      prop_X=0.01,
                      prop_l=0.5,
                      n_mcmc=20,
                      history=True,
                      setting="gaussian",
                      T=0):
    F, mu, sigma, sigma_l, pi = theta

    optimal_rate = 0.234
    batch = 5
    n_samples, n, p = Xs_mh.shape
    K = len(pi)

    # Initialize the exhaustive statistics for each cluster
    Xs_comp = st.comp(Xs_mh, ls_mh)
    X_bar = np.zeros((K, n, p))
    l_bar = np.zeros((K, p))
    l2_bar = np.zeros(K)
    s2_bar = np.zeros(K)
    for k in range(K):
        idx = np.where(zs_mh == k)[0]
        # Check that the cluster is not empty
        if len(idx) > 0:
            X_bar[k] = Xs_mh[idx].mean(axis=0)
            l_bar[k] = ls_mh[idx].mean(axis=0)
            l2_bar[k] = (ls_mh[idx]**2).mean(axis=0).sum()
            s2_bar[k] = ((As[idx] - Xs_comp[idx])**2).mean()
        else:
            X_bar[k] = Xs_mh.mean(axis=0)
            l_bar[k] = ls_mh.mean(axis=0)
            l2_bar[k] = (ls_mh**2).mean(axis=0).sum()
            s2_bar[k] = ((As - Xs_comp)**2).mean()

    # Initialize the latent variables
    Xs_mh = Xs_mh.copy()
    ls_mh = ls_mh.copy()
    zs_mh = zs_mh.copy().astype(np.int32)
    Xs_mhs = [Xs_mh]
    ls_mhs = [ls_mh]
    zs_mhs = [zs_mh]
    lks = []

    # Initialize the parameter history
    Fs = [F]
    mus = [mu]
    sigmas = [sigma]
    sigma_ls = [sigma_l]
    pis = [pi]

    for n in trange(n_iter):
        # MCMC step
        temp = 1 + T / (n + 1)**0.6
        Xs_mh, ls_mh, zs_mh, _, rate_X, rate_l = mcmc.mh_cluster(
            As,
            theta,
            n_iter=n_mcmc,
            init=(Xs_mh, ls_mh, zs_mh),
            prop_X=prop_X,
            prop_l=prop_l,
            setting=setting,
            T=temp)

        if n % batch == 0:
            mode = [st.proj_V(F[k]) for k in range(K)]
            mu_old = mu.copy()
            # Align the F parameters of each cluster to the first cluster.
            for k in range(1, K):
                perm, sign = st.greedy_permutation(mode[0], mode[k])
                F[k] = sign * F[k][:, perm]
                mu[k] = mu[k][perm]
            # Permute the X columns to best match the F parameter of their cluster
            if n < n_iter // 3 or norm(mu_old - mu) > 0:
                for i in range(n_samples):
                    perm, sign = st.greedy_permutation(mode[zs_mh[i]],
                                                       Xs_mh[i])
                    Xs_mh[i] = sign * Xs_mh[i][:, perm]
                    ls_mh[i] = ls_mh[i][perm]

        # Update proposal variance for adaptive MCMC
        adaptive_X = 2 * (rate_X > optimal_rate) - 1
        prop_X = np.exp(np.log(prop_X) + 0.5 * adaptive_X / (1 + n)**0.6)
        adaptive_l = 2 * (rate_l > optimal_rate) - 1
        prop_l = np.exp(np.log(prop_l) + 0.5 * adaptive_l / (1 + n)**0.6)

        # Maximization step

        # Update the stochastic approximation coefficient
        if n < n_iter / 2:
            alpha = 1
        else:
            alpha = 1 / (n - n_iter / 2 + 1)**0.6

        # Update the exhaustive statistics
        Xs_comp = st.comp(Xs_mh, ls_mh)
        for k in range(K):
            idx = np.where(zs_mh == k)[0]
            if len(idx) > 0:
                X_bar_new = Xs_mh[idx].mean(axis=0)
                l_bar_new = ls_mh[idx].mean(axis=0)
                l2_bar_new = (ls_mh[idx]**2).mean(axis=0).sum()
                s2_bar_new = ((As[idx] - Xs_comp[idx])**2).mean()

                X_bar[k] = (1 - alpha) * X_bar[k] + alpha * X_bar_new
                l_bar[k] = (1 - alpha) * l_bar[k] + alpha * l_bar_new
                l2_bar[k] = (1 - alpha) * l2_bar[k] + alpha * l2_bar_new
                s2_bar[k] = (1 - alpha) * s2_bar[k] + alpha * s2_bar_new

        # Update the parameters for each cluster
        for k in range(K):
            sigma[k] = np.sqrt(s2_bar[k])
            F[k] = vmf.mle(X_bar[k], orth=True)
            mu[k] = l_bar[k]
            sigma_l[k] = np.sqrt((norm(mu[k])**2 + l2_bar[k] - 2 *
                                  (mu[k] * l_bar[k]).sum()) / p)
        pi = pi.copy()
        for k in range(K):
            pi[k] = (zs_mh == k).mean()

        theta = (F, mu, sigma, sigma_l, pi)

        # Store the current complete log-likelihood
        if setting == "gaussian":
            lks.append(
                model_cluster.log_lk(Xs_mh,
                                     ls_mh,
                                     zs_mh,
                                     As,
                                     theta,
                                     normalized=True))
        elif setting == "binary":
            lks.append(
                model_bin_cluster.log_lk(Xs_mh,
                                         ls_mh,
                                         zs_mh,
                                         As,
                                         theta,
                                         normalized=True))

        Fs.append(F)
        mus.append(mu)
        sigmas.append(sigma)
        sigma_ls.append(sigma_l)
        pis.append(pi)

        # if history is True, store the values of Xs and ls along the Markov chain
        if history:
            Xs_mhs.append(Xs_mh.copy())
            ls_mhs.append(ls_mh.copy())
            zs_mhs.append(zs_mh.copy())

    result = {
        "theta": theta,
        "Xs_mh": Xs_mh,
        "ls_mh": ls_mh,
        "zs_mh": zs_mh,
        "history": {
            "lks": lks,
            "F": Fs,
            "mu": mus,
            "sigma": sigmas,
            "sigma_l": sigma_ls,
            "pi": pis,
            "Xs_mh": Xs_mhs,
            "ls_mh": ls_mhs,
            "zs_mh": zs_mhs
        }
    }

    return result