Ejemplo n.º 1
0
def gibbs(y,
          N=None,K=None,alpha=1,
          iters=500,burnin=0,skip=0,
          truth=None):
    """
    gibbs sampling for a stochastic block model
    
    N = |nodes|
    K = |classes|
    N2 = |edges| = N^2 - N = 2 * (N-1 + ... 1)

    in my notation,
    x" means "curr x"
    "x_" means "next x"

    pi : [0,1]^K
    pi[k] : [0,1]

    z : {1..K}^N
    z[i] : {1..K}

    W : [0,1]^(K^2)
    W[k,l] : [0,1]

    y : {0,1}^|y|
    0 <= |y| <= N2
    """

    print '--- Gibbs Sampling ---'
    assert N and K
    assert 0 <= burnin < iters and 0 <= skip < iters
    assert truth is not None
    N2 = N**2 - N
    probs, rands = zeros(iters), zeros(iters)
    true_z = truth

    # Init: sample from priors
    # dirichlet prior
    alpha = ones(K,dtype=int)
    pi_ = sample.dirichlet(ones(K))
    pi = pi_.copy()

    # beta prior
    h,t = ones((K,K)), ones((K,K))
    W_ = sample.beta(1,1, size=(K,K))
    W = W_.copy()

    # categorical prior
    z_ = nans(N,dtype=int)
    for i in xrange(N):
        z_[i] = sample.categorical(pi)
    z = z_.copy()

    pis, Ws, zs = [pi],[W],[z]
    # Iter: sample from posteriors
    for it in xrange(iters):
        if (it+1)%100==0: print '- %d -' % (it+1)

        for k in xrange(K): alpha[k] += sum(z==k)
        pi_ = sample.dirichlet(alpha)
        #print alpha

        for (k,l) in cross(K,K):
            Ykl = array([y[i,j] for (i,j) in cross(N,N)
                         if i!=j and y[i,j]!=nan and z[i]==k and z[j]==l])
            h[k,l] += sum(Ykl==1)
            t[k,l] += sum(Ykl==0)
        W_ = sample.beta(h,t, size=(K,K))
        #print h,t

        for i in xrange(N):
            zP = array([pi[zi]
                        * prod([W[zi,z[j]] if y[i,j] else 1-W[zi,z[j]]
                                for j in xrange(N)
                                if j!=i and y[i,j]!=nan])
                        * prod([W[z[j],zi] if y[j,i] else 1-W[z[j],zi]
                                for j in xrange(N)
                                if j!=i and y[j,i]!=nan])
                        for zi in xrange(K)])
            zP /= sum(zP)
            z_[i] = sample.categorical(zP)
        #print z

        pi = pi_.copy()
        W  = W_.copy()
        z  = z_.copy()

        pis.append(pi)
        Ws.append(W)
        zs.append(z)

        # compute log-probability; should (non-monotonically) increase
        Ber = sum(log(W[z[i],z[j]] if y[i,j] else 1-W[z[i],z[j]])
                  for (i,j) in cross(N,N) if i!=j and y[i,j]!=nan)
        Cat = sum(log(pi[z[i]]) for i in range(N))
        Dir = sum((alpha[k]-1) * log(pi[k])
                  for k in xrange(K)) - log_Beta(alpha)
        probs[it] = Ber + Cat + Dir

        # compute rand_index by samples
        rands[it] = rand_index(true_z, z)

    return pis,zs,Ws, probs,rands
Ejemplo n.º 2
0
Archivo: mmsb.py Proyecto: sboosali/PGM
def gibbs(y, N=None,K=None,alpha=1, iters=500,burnin=0,skip=0):
    """
    gibbs sampling for a stochastic block model
    
    N = |nodes|
    K = |classes|
    N2 = |edges| = N^2 - N = 2 * (N-1 + ... 1)

    in my notation,
    x" means "curr x"
    "x_" means "next x"

   
    pi : [0,1]^K^N
    pi[i] : [0,1]^K

    s : {1..K}^|edges|
    s[ij] : {1..K}

    r : {1..K}^|edges|
    r[ij] : {1..K}

    W : [0,1]^(K^2)
    W[k,l] : [0,1]

    y : {0,1}^|y|
    0 <= |y| <= |edges|
    """
    print '--- Gibbs Sampling ---'
    assert N and K
    N2 = N**2 - N
    probs = nans(iters)

    # Init: sample from priors
    # dirichlet prior
    alpha = ones((N,K),dtype=int)
    pi_ = sample.dirichlet(ones(K), size=N)
    pi = pi_.copy()

    # beta prior
    h,t = ones((K,K)), ones((K,K))
    W_ = sample.beta(1,1, size=(K,K))
    W = W_.copy()

    # categorical prior
    r_ = nans((N,N))
    for (i,j) in cross(N,N):
        if i==j: continue
        r_[i,j] = sample.categorical(pi[i])
    r = r_.copy()

    # categorical prior
    s_ = nans((N,N))
    for (i,j) in cross(N,N):
        if i==j: continue
        s_[i,j] = sample.categorical(pi[j])
    s = s_.copy()

    pis,Ws,rs,ss = [pi],[W],[r],[s]
    # Iter: sample from posteriors
    for it in range(iters):
        if (it+1)%100==0: print '- %d -' % (it+1)

        for i in range(N):
            for k in range(K):
                alpha[i,k] += sum(r[i,:]==k) + sum(s[:,i]==k)
            pi_[i] = sample.dirichlet(alpha[i])

        for (k,l) in cross(K,K):
            h[k,l] += sum((y==1) * (r==k) * (s==l))
            t[k,l] += sum((y==0) * (r==k) * (s==l))
        W_ = sample.beta(h,t, size=(K,K))

        r_ = nans((N,N))
        for (i,j) in cross(N,N):
            if i==j: continue
            l = s[i,j]
            rP = array([pi[i][k] * (W[k,l] if y[i,j] else 1-W[k,l])
                        for k in range(K)])
            rP /= sum(rP)
            r_[i,j] = sample.categorical(rP)

        s_ = nans((N,N))
        for (i,j) in cross(N,N):
            if i==j: continue
            k = r[i,j]
            sP = array([pi[j][l] * (W[k,l] if y[i,j] else 1-W[k,l])
                        for l in range(K)])
            sP /= sum(sP)
            s_[i,j] = sample.categorical(sP)

        pi = pi_.copy()
        W  = W_.copy()
        r  = r_.copy()
        s  = s_.copy()
        
        pis.append(pi)
        Ws.append(W)
        rs.append(r)
        ss.append(s)

        # compute log-probability; should (non-monotonically) increase
        Ber = sum(log(W[r[i,j], s[i,j]] if y[i,j] else 1-W[r[i,j], s[i,j]])
                  for (i,j) in cross(N,N) if i!=j and y[i,j]!=nan)
        Cat = sum(log(pi[i][r[i,j]]) +
                  log(pi[j][s[i,j]])
                  for (i,j) in cross(N,N) if i!=j)
        Dir = sum(sum((alpha[i][k]-1) * log(pi[i][k]) for k in range(K))
                  - log_Beta(alpha[i])
                  for i in range(N))
        probs[it] = Ber + Cat + Dir

    return pis,rs,ss,Ws, probs