def factors(self, w, x, z, A): if self.data == 'binary': def f_xi(zi, xi): pi = T.nnet.sigmoid(T.dot(w['wx'], zi) + T.dot(w['bx'], A)) # pi = p(X_i=1) logpxi = - T.nnet.binary_crossentropy(pi, xi).sum(axis=0, keepdims=True)# logpxi = log p(X_i=x_i) #logpxi = T.log(pi*xi+(1-pi)*(1-xi)).sum(axis=0, keepdims=True) return logpxi elif self.data == 'gaussian': def f_xi(zi, xi): x_mean = T.dot(w['wx'], zi) + T.dot(w['bx'], A) x_logvar = T.dot(2*w['logsdx'], A) return ap.logpdfs.normal2(xi, x_mean, x_logvar).sum(axis=0, keepdims=True) else: raise Exception() # Factors of X and Z logpx = 0 logpz = 0 sd = T.dot(T.exp(w['logsd']), A) for i in range(self.n_steps): if i == 0: logpz += logpdfs.standard_normal(z['z'+str(i)]).sum(axis=0, keepdims=True) if i > 0: mean = T.tanh(T.dot(w['wz'], z['z'+str(i-1)]) + T.dot(w['bz'], A)) logpz += logpdfs.normal(z['z'+str(i)], mean, sd).sum(axis=0, keepdims=True) logpxi = f_xi(z['z'+str(i)], x['x'+str(i)]) logpx += logpxi # joint() = logp(x,z,w) = logp(x|z) + logp(z) + logp(w) + C # This is a proper scalar function logpw = 0 for i in w: logpw += logpdfs.normal(w[i], 0, self.prior_sd).sum() # logp(w) return logpw, logpx, logpz, {}
def factors(self, w, x, z, A): # Define logp(z0) logpz = logpdfs.standard_normal(z['z0']).sum(axis=0) # Define logp(z_{n+1}|z_n) z_prev = z['z0'] for i in range(1, len(self.n_hidden)): mean = T.tanh(T.dot(w['w'+str(i)], z_prev) + T.dot(w['b'+str(i)], A)) sd = T.dot(T.exp(w['logsd'+str(i)]), A) logpz += logpdfs.normal(z['z'+str(i)], mean, sd).sum(axis=0) z_prev = z['z'+str(i)] z_basis = T.tanh(T.dot(w['wbasis'], z_prev) + T.dot(w['bbasis'], A)) # logp(x|z_{last}) p = 1e-3 + (1-2e-3) * T.nnet.sigmoid(T.dot(w['wout'], z_basis) + T.dot(w['bout'], A)) # p = p(X=1) logpx = - T.nnet.binary_crossentropy(p, x['x']) logpx = T.dot(np.ones((1, self.n_output)), logpx) # joint() = logp(x,z,w) = logp(x,z|w) + logp(w) = logpxz + logp(w) + C # This is a proper scalar function logpw = 0 for i in w: logpw += logpdfs.normal(w[i], 0, self.prior_sd).sum() # logp(w) return logpw, logpx, logpz, {}
def factors(self, w, x, z, A): if self.data == 'binary': def f_xi(zi, xi): pi = T.nnet.sigmoid(T.dot(w['wx'], zi) + T.dot(w['bx'], A)) # pi = p(X_i=1) logpxi = -T.nnet.binary_crossentropy(pi, xi).sum( axis=0, keepdims=True) # logpxi = log p(X_i=x_i) #logpxi = T.log(pi*xi+(1-pi)*(1-xi)).sum(axis=0, keepdims=True) return logpxi elif self.data == 'gaussian': def f_xi(zi, xi): x_mean = T.dot(w['wx'], zi) + T.dot(w['bx'], A) x_logvar = T.dot(2 * w['logsdx'], A) return ap.logpdfs.normal2(xi, x_mean, x_logvar).sum(axis=0, keepdims=True) else: raise Exception() # Factors of X and Z logpx = 0 logpz = 0 sd = T.dot(T.exp(w['logsd']), A) for i in range(self.n_steps): if i == 0: logpz += logpdfs.standard_normal(z['z' + str(i)]).sum( axis=0, keepdims=True) if i > 0: mean = T.tanh( T.dot(w['wz'], z['z' + str(i - 1)]) + T.dot(w['bz'], A)) logpz += logpdfs.normal(z['z' + str(i)], mean, sd).sum(axis=0, keepdims=True) logpxi = f_xi(z['z' + str(i)], x['x' + str(i)]) logpx += logpxi # joint() = logp(x,z,w) = logp(x|z) + logp(z) + logp(w) + C # This is a proper scalar function logpw = 0 for i in w: logpw += logpdfs.normal(w[i], 0, self.prior_sd).sum() # logp(w) return logpw, logpx, logpz, {}
def functions(w, z, x): # Define symbolic program A = np.ones((1, n_batch)) B = np.ones((n_batch,1)) C = np.ones((1, n_output)) hidden = [] hidden.append(z['eps0']) if noMiddleEps: gate = 0; else: gate = 1 for i in range(1, len(n_hidden)): hidden.append(T.tanh(T.dot(w['w%d'%i], hidden[i-1]) + T.dot(w['b'+str(i)], A)) + gate * z['eps'+str(i)] * T.dot(T.exp(w['logsd'+str(i)]), A)) p = 0.5 + 0.5 * T.tanh(T.dot(w['wout'], hidden[-1]) + T.dot(w['bout'], A)) # p = p(X=1) if False: # NOTE: code below should be correct but gives obscure Theano error # duplicate columns of p xt = T.dot(x['x'].reshape((n_output*n_batch, 1)), A).reshape((n_output, n_batch*n_batch)) # tile p p = p.copy().T.copy() p = p.reshape((1, n_output*n_batch)).copy() p = T.dot(B, p).reshape((n_batch*n_batch, n_output)).copy().T.copy() logpx = T.log(xt * p + (1-xt)*(1-p)) logpx = T.dot(np.ones((1, n_output)), logpx) logpx = logpx.reshape((n_batch,n_batch)) logpx_means = logpx.max(axis=1).dimshuffle(0, 'x') pxz = T.exp(logpx - T.dot(logpx_means, A)) logpx = (T.log(T.dot(pxz, 1./n_batch * B)) + logpx_means).reshape((1,n_batch)) if False: # NOTE: this alternative method seems to work, but is super slow to compile logpx = [] for i in range(n_batch): xi = T.dot(x['x'][:,i:i+1], A) logpxi = T.log(xi * p + (1-xi)*(1-p)) logpxi = T.dot(np.ones((1, n_output)), logpxi) logpxi_max = logpxi.max()#logpx.max(axis=1).dimshuffle(0, 'x') pxz = T.exp(logpxi - logpxi_max) logpxi = T.log(T.dot(pxz, 1./n_batch * B)) + logpxi_max #T.basic.set_subtensor(logpx[0:1,i:i+1], logpxi, inplace=True) logpx.append(logpxi) pass logpx = T.concatenate(logpx, 1) # Note: logpz is a row vector (one element per sample) logpz = 0 for i in z: logpz += logpdfs.standard_normal(z[i]).sum(axis=0) # logp(z) # Note: logpw is a scalar logpw = 0 for i in w: logpw += logpdfs.normal(w[i], 0, prior_sd).sum() # logp(w) return logpx, logpz, logpw
def functions(w, z, x): # Define symbolic program A = np.ones((1, n_batch)) B = np.ones((n_batch, 1)) C = np.ones((1, n_output)) hidden = [] hidden.append(z['eps0']) if noMiddleEps: gate = 0 else: gate = 1 for i in range(1, len(n_hidden)): hidden.append( T.tanh( T.dot(w['w%d' % i], hidden[i - 1]) + T.dot(w['b' + str(i)], A)) + gate * z['eps' + str(i)] * T.dot(T.exp(w['logsd' + str(i)]), A)) p = 0.5 + 0.5 * T.tanh( T.dot(w['wout'], hidden[-1]) + T.dot(w['bout'], A)) # p = p(X=1) if False: # NOTE: code below should be correct but gives obscure Theano error # duplicate columns of p xt = T.dot(x['x'].reshape((n_output * n_batch, 1)), A).reshape( (n_output, n_batch * n_batch)) # tile p p = p.copy().T.copy() p = p.reshape((1, n_output * n_batch)).copy() p = T.dot(B, p).reshape( (n_batch * n_batch, n_output)).copy().T.copy() logpx = T.log(xt * p + (1 - xt) * (1 - p)) logpx = T.dot(np.ones((1, n_output)), logpx) logpx = logpx.reshape((n_batch, n_batch)) logpx_means = logpx.max(axis=1).dimshuffle(0, 'x') pxz = T.exp(logpx - T.dot(logpx_means, A)) logpx = (T.log(T.dot(pxz, 1. / n_batch * B)) + logpx_means).reshape((1, n_batch)) if False: # NOTE: this alternative method seems to work, but is super slow to compile logpx = [] for i in range(n_batch): xi = T.dot(x['x'][:, i:i + 1], A) logpxi = T.log(xi * p + (1 - xi) * (1 - p)) logpxi = T.dot(np.ones((1, n_output)), logpxi) logpxi_max = logpxi.max( ) #logpx.max(axis=1).dimshuffle(0, 'x') pxz = T.exp(logpxi - logpxi_max) logpxi = T.log(T.dot(pxz, 1. / n_batch * B)) + logpxi_max #T.basic.set_subtensor(logpx[0:1,i:i+1], logpxi, inplace=True) logpx.append(logpxi) pass logpx = T.concatenate(logpx, 1) # Note: logpz is a row vector (one element per sample) logpz = 0 for i in z: logpz += logpdfs.standard_normal(z[i]).sum(axis=0) # logp(z) # Note: logpw is a scalar logpw = 0 for i in w: logpw += logpdfs.normal(w[i], 0, prior_sd).sum() # logp(w) return logpx, logpz, logpw