def f_train_q(): keys_q = [] for i in w: if '_q_' in i: keys_q.append(i) train_cost = f_encode_decode(w)['cost'] updates = G.misc.optim.AdaMaxAvg([w],None, train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, update_keys=keys_q, disconnected_inputs='ignore') updates = postup(updates, w) return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)
def f_decode(): eps = {} for i in range(len(depths)): for j in range(depths[i]): eps['eps_'+str(i)+'_'+str(j)] = T.tensor4('eps'+str(i)) eps['eps_'+str(i)+'_'+str(j)].tag.test_value = np.random.randn(n_batch_test,n_z,shape_x[1]/2**(i+1),shape_x[2]/2**(i+1)).astype(floatX) image = f_decoder(eps, w_avg) return G.function(eps, image, lazy=lazy)
def f_train(): if optim == 'adamax': train_cost = f_encode_decode(w)['cost'] updates = G.misc.optim.AdaMaxAvg([w],[w_avg], train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore') elif optim == 'eve': f = lambda w: f_encode_decode(w)['cost'] train_cost, updates = G.misc.optim.Eve(w, w_avg, f, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore') updates = postup(updates, w) return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)
def fcvae(shape_x, depth_model, depth_ar, n_h1, n_h2, n_z, posterior, px='logistic', nl='softplus', alpha=0.002, beta1=0.1, beta2=0.001, share_w=False, data_init=None): _locals = locals() _locals.pop('data_init') print 'CVAE9 with ', _locals #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear'] assert px in ['logistic','bernoulli'] w = {} # model params kernel_h = (1,1) n_x = shape_x[0]*shape_x[1]*shape_x[2] # Input whitening if px == 'logistic': w['logsd_x'] = G.sharedf(0.) # encoder x_enc = N.conv.conv2d('x_enc', n_x, n_h1, (1,1), w=w) x_dec = N.conv.conv2d('x_dec', n_h1, n_x, (1,1), w=w) x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w) layers = [] for i in range(depth_model): name = str(i) if share_w: name = '[sharedw]'+str(i)+'[/sharedw]' layers.append(cvae_layer(name, posterior, n_h1, n_h2, n_z, depth_ar, False, nl, kernel_h, share_w, w)) # top-level value #w['h_top'] = G.sharedf(np.zeros((n_h1,))) w['h_top'] = G.sharedf(np.random.normal(0,0.01,size=(n_h1,))) # Initialize variables x = T.tensor4('x') x.tag.test_value = data_init['x'] n_batch_test = data_init['x'].shape[0] _x = T.clip(x / 255., 0, 1) # Objective function def f_cost(w, train=True): results = {} h = x_enc(_x.reshape((-1,n_x,1,1)) - .5, w) obj_logpz = 0 obj_logqz = 0 # bottom-up encoders for i in range(depth_model): h = layers[i].up(h, w) # top-level activations h = T.tile(w['h_top'].dimshuffle('x',0,'x','x'), (_x.shape[0],1,1,1)) # top-down priors, posteriors and decoders for i in list(reversed(range(depth_model))): h, _obj_logqz, _obj_logpz = layers[i].down_q(h, train, w) obj_logqz += _obj_logqz obj_logpz += _obj_logpz results['cost_z'+str(i).zfill(3)] = _obj_logqz - _obj_logpz output = .1 * x_dec(x_dec_nl(h, w), w).reshape((-1,shape_x[0],shape_x[1],shape_x[2])) # empirical distribution if px == 'logistic': mean_x = T.clip(output, -.5, .5) logsd_x = 0*mean_x + w['logsd_x'] obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1/255., _x - .5).logp obj = obj_logpz - obj_logqz + obj_logpx # Compute the bits per pixel obj *= (1./np.prod(shape_x) * 1./np.log(2.)).astype('float32') elif px == 'bernoulli': prob_x = T.nnet.sigmoid(output) prob_x = T.minimum(prob_x, 1-1e-7) prob_x = T.maximum(prob_x, 1e-7) #prob_x = T.printing.Print('prob_x')(prob_x) obj_logpx = N.rand.bernoulli(prob_x, _x).logp #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz) #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz) #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx) obj = obj_logpz - obj_logqz + obj_logpx #obj = T.printing.Print('obj')(obj) results['cost_x'] = -obj_logpx results['cost'] = -obj return results #print 'obj_logpz', obj_logpz.tag.test_value #print 'obj_logqz', obj_logqz.tag.test_value #print 'obj_logpx', obj_x.tag.test_value #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz) #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz) #obj_x = T.printing.Print('obj_logpx')(obj_x) # Turns Gaussian noise 'eps' into a sample def f_decoder(eps, w): # top-level activations h = T.tile(w['h_top'].dimshuffle('x',0,'x','x'), (eps['eps_0'].shape[0],1,1,1)) # top-down priors, posteriors and decoders for i in list(reversed(range(depth_model))): h = layers[i].down_p(h, eps['eps_'+str(i)], w) output = .1 * x_dec(x_dec_nl(h, w), w).reshape((-1,shape_x[0],shape_x[1],shape_x[2])) if px == 'logistic': mean_x = T.clip(output[:,:,:,:] + .5, 0, 1) elif px == 'bernoulli': mean_x = T.nnet.sigmoid(output) image = (255.*T.clip(mean_x, 0, 1)).astype('uint8') return image def f_eps(n_batch, w): eps = {} for i in range(depth_model): eps['eps_'+str(i)] = G.rng_curand.normal((n_batch,n_z,1,1),dtype=floatX) return eps def postup(updates, w): nodes = [x_enc,x_dec] for n in nodes: updates = n.postup(updates, w) for i in range(depth_model): updates = layers[i].postup(updates, w) return updates # Compile init function if data_init != None: w['__init'] = OrderedDict() f_cost(w) w.pop('__init') #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean() # Compile training function results = f_cost(w) updates, (w_avg,) = G.misc.optim.AdaMaxAvg([w], results['cost'], alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore') #todo: replace postup with below #w['_updates'] = updates #f_cost(w) #updates = w.pop('_updates') updates = postup(updates, w) f_train = G.function({'x':x}, results['cost'], updates=updates) # Compile evaluation function results = f_cost(w_avg, False) f_eval = G.function({'x':x}, results) # Compile epsilon generating function n_batch = T.lscalar() n_batch.tag.test_value = 16 eps = f_eps(n_batch, w) f_eps = G.function({'n_batch':n_batch}, eps) # Compile sampling function eps = {} for i in range(depth_model): eps['eps_'+str(i)] = T.tensor4('eps'+str(i)) eps['eps_'+str(i)].tag.test_value = np.random.randn(n_batch_test,n_z,1,1).astype(floatX) image = f_decoder(eps, w_avg) f_decode = G.function(eps, image) return G.Struct(train=f_train, eval=f_eval, decode=f_decode, eps=f_eps, w=w, w_avg=w_avg)
def f_eps_(): n_batch = T.lscalar() n_batch.tag.test_value = 16 eps = f_eps(n_batch, w) return G.function({'n_batch':n_batch}, eps, lazy=lazy)
def f_eval(): results = f_encode_decode(w_avg, False) return G.function({'x':x}, results)
def fcvae(shape_x, depth_model, depth_ar, n_h1, n_h2, n_z, posterior, px='logistic', nl='softplus', alpha=0.002, beta1=0.1, beta2=0.001, share_w=False, data_init=None): _locals = locals() _locals.pop('data_init') print 'CVAE9 with ', _locals #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear'] assert px in ['logistic', 'bernoulli'] w = {} # model params kernel_h = (1, 1) n_x = shape_x[0] * shape_x[1] * shape_x[2] # Input whitening if px == 'logistic': w['logsd_x'] = G.sharedf(0.) # encoder x_enc = N.conv.conv2d('x_enc', n_x, n_h1, (1, 1), w=w) x_dec = N.conv.conv2d('x_dec', n_h1, n_x, (1, 1), w=w) x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w) layers = [] for i in range(depth_model): name = str(i) if share_w: name = '[sharedw]' + str(i) + '[/sharedw]' layers.append( cvae_layer(name, posterior, n_h1, n_h2, n_z, depth_ar, False, nl, kernel_h, share_w, w)) # top-level value #w['h_top'] = G.sharedf(np.zeros((n_h1,))) w['h_top'] = G.sharedf(np.random.normal(0, 0.01, size=(n_h1, ))) # Initialize variables x = T.tensor4('x') x.tag.test_value = data_init['x'] n_batch_test = data_init['x'].shape[0] _x = T.clip(x / 255., 0, 1) # Objective function def f_cost(w, train=True): results = {} h = x_enc(_x.reshape((-1, n_x, 1, 1)) - .5, w) obj_logpz = 0 obj_logqz = 0 # bottom-up encoders for i in range(depth_model): h = layers[i].up(h, w) # top-level activations h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'), (_x.shape[0], 1, 1, 1)) # top-down priors, posteriors and decoders for i in list(reversed(range(depth_model))): h, _obj_logqz, _obj_logpz = layers[i].down_q(h, train, w) obj_logqz += _obj_logqz obj_logpz += _obj_logpz results['cost_z' + str(i).zfill(3)] = _obj_logqz - _obj_logpz output = .1 * x_dec(x_dec_nl(h, w), w).reshape( (-1, shape_x[0], shape_x[1], shape_x[2])) # empirical distribution if px == 'logistic': mean_x = T.clip(output, -.5, .5) logsd_x = 0 * mean_x + w['logsd_x'] obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1 / 255., _x - .5).logp obj = obj_logpz - obj_logqz + obj_logpx # Compute the bits per pixel obj *= (1. / np.prod(shape_x) * 1. / np.log(2.)).astype('float32') elif px == 'bernoulli': prob_x = T.nnet.sigmoid(output) prob_x = T.minimum(prob_x, 1 - 1e-7) prob_x = T.maximum(prob_x, 1e-7) #prob_x = T.printing.Print('prob_x')(prob_x) obj_logpx = N.rand.bernoulli(prob_x, _x).logp #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz) #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz) #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx) obj = obj_logpz - obj_logqz + obj_logpx #obj = T.printing.Print('obj')(obj) results['cost_x'] = -obj_logpx results['cost'] = -obj return results #print 'obj_logpz', obj_logpz.tag.test_value #print 'obj_logqz', obj_logqz.tag.test_value #print 'obj_logpx', obj_x.tag.test_value #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz) #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz) #obj_x = T.printing.Print('obj_logpx')(obj_x) # Turns Gaussian noise 'eps' into a sample def f_decoder(eps, w): # top-level activations h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'), (eps['eps_0'].shape[0], 1, 1, 1)) # top-down priors, posteriors and decoders for i in list(reversed(range(depth_model))): h = layers[i].down_p(h, eps['eps_' + str(i)], w) output = .1 * x_dec(x_dec_nl(h, w), w).reshape( (-1, shape_x[0], shape_x[1], shape_x[2])) if px == 'logistic': mean_x = T.clip(output[:, :, :, :] + .5, 0, 1) elif px == 'bernoulli': mean_x = T.nnet.sigmoid(output) image = (255. * T.clip(mean_x, 0, 1)).astype('uint8') return image def f_eps(n_batch, w): eps = {} for i in range(depth_model): eps['eps_' + str(i)] = G.rng_curand.normal((n_batch, n_z, 1, 1), dtype=floatX) return eps def postup(updates, w): nodes = [x_enc, x_dec] for n in nodes: updates = n.postup(updates, w) for i in range(depth_model): updates = layers[i].postup(updates, w) return updates # Compile init function if data_init != None: w['__init'] = OrderedDict() f_cost(w) w.pop('__init') #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean() # Compile training function results = f_cost(w) updates, (w_avg, ) = G.misc.optim.AdaMaxAvg([w], results['cost'], alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore') #todo: replace postup with below #w['_updates'] = updates #f_cost(w) #updates = w.pop('_updates') updates = postup(updates, w) f_train = G.function({'x': x}, results['cost'], updates=updates) # Compile evaluation function results = f_cost(w_avg, False) f_eval = G.function({'x': x}, results) # Compile epsilon generating function n_batch = T.lscalar() n_batch.tag.test_value = 16 eps = f_eps(n_batch, w) f_eps = G.function({'n_batch': n_batch}, eps) # Compile sampling function eps = {} for i in range(depth_model): eps['eps_' + str(i)] = T.tensor4('eps' + str(i)) eps['eps_' + str(i)].tag.test_value = np.random.randn( n_batch_test, n_z, 1, 1).astype(floatX) image = f_decoder(eps, w_avg) f_decode = G.function(eps, image) return G.Struct(train=f_train, eval=f_eval, decode=f_decode, eps=f_eps, w=w, w_avg=w_avg)
def f_eps_(): n_batch = T.lscalar() n_batch.tag.test_value = 16 eps = f_eps(n_batch, w) return G.function({'n_batch': n_batch}, eps, lazy=lazy)
def f_eval(): results = f_encode_decode(w_avg, False) return G.function({'x': x}, results)