def gibbs(y, N=None,K=None,alpha=1, iters=500,burnin=0,skip=0, truth=None): """ gibbs sampling for a stochastic block model N = |nodes| K = |classes| N2 = |edges| = N^2 - N = 2 * (N-1 + ... 1) in my notation, x" means "curr x" "x_" means "next x" pi : [0,1]^K pi[k] : [0,1] z : {1..K}^N z[i] : {1..K} W : [0,1]^(K^2) W[k,l] : [0,1] y : {0,1}^|y| 0 <= |y| <= N2 """ print '--- Gibbs Sampling ---' assert N and K assert 0 <= burnin < iters and 0 <= skip < iters assert truth is not None N2 = N**2 - N probs, rands = zeros(iters), zeros(iters) true_z = truth # Init: sample from priors # dirichlet prior alpha = ones(K,dtype=int) pi_ = sample.dirichlet(ones(K)) pi = pi_.copy() # beta prior h,t = ones((K,K)), ones((K,K)) W_ = sample.beta(1,1, size=(K,K)) W = W_.copy() # categorical prior z_ = nans(N,dtype=int) for i in xrange(N): z_[i] = sample.categorical(pi) z = z_.copy() pis, Ws, zs = [pi],[W],[z] # Iter: sample from posteriors for it in xrange(iters): if (it+1)%100==0: print '- %d -' % (it+1) for k in xrange(K): alpha[k] += sum(z==k) pi_ = sample.dirichlet(alpha) #print alpha for (k,l) in cross(K,K): Ykl = array([y[i,j] for (i,j) in cross(N,N) if i!=j and y[i,j]!=nan and z[i]==k and z[j]==l]) h[k,l] += sum(Ykl==1) t[k,l] += sum(Ykl==0) W_ = sample.beta(h,t, size=(K,K)) #print h,t for i in xrange(N): zP = array([pi[zi] * prod([W[zi,z[j]] if y[i,j] else 1-W[zi,z[j]] for j in xrange(N) if j!=i and y[i,j]!=nan]) * prod([W[z[j],zi] if y[j,i] else 1-W[z[j],zi] for j in xrange(N) if j!=i and y[j,i]!=nan]) for zi in xrange(K)]) zP /= sum(zP) z_[i] = sample.categorical(zP) #print z pi = pi_.copy() W = W_.copy() z = z_.copy() pis.append(pi) Ws.append(W) zs.append(z) # compute log-probability; should (non-monotonically) increase Ber = sum(log(W[z[i],z[j]] if y[i,j] else 1-W[z[i],z[j]]) for (i,j) in cross(N,N) if i!=j and y[i,j]!=nan) Cat = sum(log(pi[z[i]]) for i in range(N)) Dir = sum((alpha[k]-1) * log(pi[k]) for k in xrange(K)) - log_Beta(alpha) probs[it] = Ber + Cat + Dir # compute rand_index by samples rands[it] = rand_index(true_z, z) return pis,zs,Ws, probs,rands
def gibbs(y, N=None,K=None,alpha=1, iters=500,burnin=0,skip=0): """ gibbs sampling for a stochastic block model N = |nodes| K = |classes| N2 = |edges| = N^2 - N = 2 * (N-1 + ... 1) in my notation, x" means "curr x" "x_" means "next x" pi : [0,1]^K^N pi[i] : [0,1]^K s : {1..K}^|edges| s[ij] : {1..K} r : {1..K}^|edges| r[ij] : {1..K} W : [0,1]^(K^2) W[k,l] : [0,1] y : {0,1}^|y| 0 <= |y| <= |edges| """ print '--- Gibbs Sampling ---' assert N and K N2 = N**2 - N probs = nans(iters) # Init: sample from priors # dirichlet prior alpha = ones((N,K),dtype=int) pi_ = sample.dirichlet(ones(K), size=N) pi = pi_.copy() # beta prior h,t = ones((K,K)), ones((K,K)) W_ = sample.beta(1,1, size=(K,K)) W = W_.copy() # categorical prior r_ = nans((N,N)) for (i,j) in cross(N,N): if i==j: continue r_[i,j] = sample.categorical(pi[i]) r = r_.copy() # categorical prior s_ = nans((N,N)) for (i,j) in cross(N,N): if i==j: continue s_[i,j] = sample.categorical(pi[j]) s = s_.copy() pis,Ws,rs,ss = [pi],[W],[r],[s] # Iter: sample from posteriors for it in range(iters): if (it+1)%100==0: print '- %d -' % (it+1) for i in range(N): for k in range(K): alpha[i,k] += sum(r[i,:]==k) + sum(s[:,i]==k) pi_[i] = sample.dirichlet(alpha[i]) for (k,l) in cross(K,K): h[k,l] += sum((y==1) * (r==k) * (s==l)) t[k,l] += sum((y==0) * (r==k) * (s==l)) W_ = sample.beta(h,t, size=(K,K)) r_ = nans((N,N)) for (i,j) in cross(N,N): if i==j: continue l = s[i,j] rP = array([pi[i][k] * (W[k,l] if y[i,j] else 1-W[k,l]) for k in range(K)]) rP /= sum(rP) r_[i,j] = sample.categorical(rP) s_ = nans((N,N)) for (i,j) in cross(N,N): if i==j: continue k = r[i,j] sP = array([pi[j][l] * (W[k,l] if y[i,j] else 1-W[k,l]) for l in range(K)]) sP /= sum(sP) s_[i,j] = sample.categorical(sP) pi = pi_.copy() W = W_.copy() r = r_.copy() s = s_.copy() pis.append(pi) Ws.append(W) rs.append(r) ss.append(s) # compute log-probability; should (non-monotonically) increase Ber = sum(log(W[r[i,j], s[i,j]] if y[i,j] else 1-W[r[i,j], s[i,j]]) for (i,j) in cross(N,N) if i!=j and y[i,j]!=nan) Cat = sum(log(pi[i][r[i,j]]) + log(pi[j][s[i,j]]) for (i,j) in cross(N,N) if i!=j) Dir = sum(sum((alpha[i][k]-1) * log(pi[i][k]) for k in range(K)) - log_Beta(alpha[i]) for i in range(N)) probs[it] = Ber + Cat + Dir return pis,rs,ss,Ws, probs