Example #1
0
File: models.py Project: openai/iaf
 def f_train_q():
     keys_q = []
     for i in w:
         if '_q_' in i: keys_q.append(i)
     train_cost = f_encode_decode(w)['cost']
     updates = G.misc.optim.AdaMaxAvg([w],None, train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, update_keys=keys_q, disconnected_inputs='ignore')
     updates = postup(updates, w)
     return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)    
Example #2
0
File: models.py Project: openai/iaf
 def f_decode():
     eps = {}
     for i in range(len(depths)):
         for j in range(depths[i]):
             eps['eps_'+str(i)+'_'+str(j)] = T.tensor4('eps'+str(i))
             eps['eps_'+str(i)+'_'+str(j)].tag.test_value = np.random.randn(n_batch_test,n_z,shape_x[1]/2**(i+1),shape_x[2]/2**(i+1)).astype(floatX)
     image = f_decoder(eps, w_avg)
     return G.function(eps, image, lazy=lazy)
Example #3
0
 def f_decode():
     eps = {}
     for i in range(len(depths)):
         for j in range(depths[i]):
             eps['eps_'+str(i)+'_'+str(j)] = T.tensor4('eps'+str(i))
             eps['eps_'+str(i)+'_'+str(j)].tag.test_value = np.random.randn(n_batch_test,n_z,shape_x[1]/2**(i+1),shape_x[2]/2**(i+1)).astype(floatX)
     image = f_decoder(eps, w_avg)
     return G.function(eps, image, lazy=lazy)
Example #4
0
 def f_train_q():
     keys_q = []
     for i in w:
         if '_q_' in i: keys_q.append(i)
     train_cost = f_encode_decode(w)['cost']
     updates = G.misc.optim.AdaMaxAvg([w],None, train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, update_keys=keys_q, disconnected_inputs='ignore')
     updates = postup(updates, w)
     return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)    
Example #5
0
File: models.py Project: openai/iaf
 def f_train():
     if optim == 'adamax':
         train_cost = f_encode_decode(w)['cost']
         updates = G.misc.optim.AdaMaxAvg([w],[w_avg], train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore')
     elif optim == 'eve':
         f = lambda w: f_encode_decode(w)['cost']
         train_cost, updates = G.misc.optim.Eve(w, w_avg, f, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore')
     updates = postup(updates, w)
     return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)    
Example #6
0
 def f_train():
     if optim == 'adamax':
         train_cost = f_encode_decode(w)['cost']
         updates = G.misc.optim.AdaMaxAvg([w],[w_avg], train_cost, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore')
     elif optim == 'eve':
         f = lambda w: f_encode_decode(w)['cost']
         train_cost, updates = G.misc.optim.Eve(w, w_avg, f, alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore')
     updates = postup(updates, w)
     return G.function({'x':x}, train_cost, updates=updates, lazy=lazy)    
Example #7
0
File: models.py Project: openai/iaf
def fcvae(shape_x, depth_model, depth_ar, n_h1, n_h2, n_z, posterior, px='logistic', nl='softplus', alpha=0.002, beta1=0.1, beta2=0.001, share_w=False, data_init=None):
    _locals = locals()
    _locals.pop('data_init')
    print 'CVAE9 with ', _locals
    #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear']
    assert px in ['logistic','bernoulli']
    w = {} # model params
    
    kernel_h = (1,1)
    n_x = shape_x[0]*shape_x[1]*shape_x[2]
    
    # Input whitening
    if px == 'logistic':
        w['logsd_x'] = G.sharedf(0.)
    
    # encoder
    x_enc = N.conv.conv2d('x_enc', n_x, n_h1, (1,1), w=w)
    x_dec = N.conv.conv2d('x_dec', n_h1, n_x, (1,1), w=w)
    x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w)
    
    layers = []
    for i in range(depth_model):
        name = str(i)
        if share_w:
            name = '[sharedw]'+str(i)+'[/sharedw]'
        layers.append(cvae_layer(name, posterior, n_h1, n_h2, n_z, depth_ar, False, nl, kernel_h, share_w, w))
    
    # top-level value
    #w['h_top'] = G.sharedf(np.zeros((n_h1,)))
    w['h_top'] = G.sharedf(np.random.normal(0,0.01,size=(n_h1,)))
    
    # Initialize variables
    x = T.tensor4('x')
    x.tag.test_value = data_init['x']
    n_batch_test = data_init['x'].shape[0]
    _x = T.clip(x / 255., 0, 1)
    
    # Objective function
    def f_cost(w, train=True):
        
        results = {}
        
        h = x_enc(_x.reshape((-1,n_x,1,1)) - .5, w)
        
        obj_logpz = 0
        obj_logqz = 0
        
        # bottom-up encoders
        for i in range(depth_model):
            h = layers[i].up(h, w)
        
        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x',0,'x','x'), (_x.shape[0],1,1,1))
        
        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h, _obj_logqz, _obj_logpz = layers[i].down_q(h, train, w)
            obj_logqz += _obj_logqz
            obj_logpz += _obj_logpz
            results['cost_z'+str(i).zfill(3)] = _obj_logqz - _obj_logpz
        
        output = .1 * x_dec(x_dec_nl(h, w), w).reshape((-1,shape_x[0],shape_x[1],shape_x[2]))
        
        # empirical distribution
        if px == 'logistic':
            mean_x = T.clip(output, -.5, .5)
            logsd_x = 0*mean_x + w['logsd_x']
            obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1/255., _x - .5).logp
            
            obj = obj_logpz - obj_logqz + obj_logpx
            # Compute the bits per pixel
            obj *= (1./np.prod(shape_x) * 1./np.log(2.)).astype('float32')
            
        elif px == 'bernoulli':
            prob_x = T.nnet.sigmoid(output)
            prob_x = T.minimum(prob_x, 1-1e-7)
            prob_x = T.maximum(prob_x, 1e-7)
            #prob_x = T.printing.Print('prob_x')(prob_x)
            obj_logpx = N.rand.bernoulli(prob_x, _x).logp
            
            #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
            #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
            #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx)
            obj = obj_logpz - obj_logqz + obj_logpx
            #obj = T.printing.Print('obj')(obj)
        
        results['cost_x'] = -obj_logpx
        results['cost'] = -obj
        return results
        
        #print 'obj_logpz', obj_logpz.tag.test_value
        #print 'obj_logqz', obj_logqz.tag.test_value
        #print 'obj_logpx', obj_x.tag.test_value
        #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
        #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
        #obj_x = T.printing.Print('obj_logpx')(obj_x)

        
        
    
    # Turns Gaussian noise 'eps' into a sample 
    def f_decoder(eps, w):

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x',0,'x','x'), (eps['eps_0'].shape[0],1,1,1))
        
        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h = layers[i].down_p(h, eps['eps_'+str(i)], w)
        
        output = .1 * x_dec(x_dec_nl(h, w), w).reshape((-1,shape_x[0],shape_x[1],shape_x[2]))
        if px == 'logistic':
            mean_x = T.clip(output[:,:,:,:] + .5, 0, 1)
        elif px == 'bernoulli':
            mean_x = T.nnet.sigmoid(output)
        image = (255.*T.clip(mean_x, 0, 1)).astype('uint8')
        return image
    
    def f_eps(n_batch, w):
        eps = {}
        for i in range(depth_model):
            eps['eps_'+str(i)] = G.rng_curand.normal((n_batch,n_z,1,1),dtype=floatX)
        return eps
            
    def postup(updates, w):
        nodes = [x_enc,x_dec]
        for n in nodes:
            updates = n.postup(updates, w)
        for i in range(depth_model):
            updates = layers[i].postup(updates, w)
        
        return updates
    
    # Compile init function
    if data_init != None:
        w['__init'] = OrderedDict()
        f_cost(w)
        w.pop('__init')
        #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean()
    
    # Compile training function
    results = f_cost(w)
    updates, (w_avg,) = G.misc.optim.AdaMaxAvg([w], results['cost'], alpha=-alpha, beta1=beta1, beta2=beta2, disconnected_inputs='ignore')
    #todo: replace postup with below
    #w['_updates'] = updates
    #f_cost(w)
    #updates = w.pop('_updates')
    
    updates = postup(updates, w)
    f_train = G.function({'x':x}, results['cost'], updates=updates)
    
    # Compile evaluation function
    results = f_cost(w_avg, False)
    f_eval = G.function({'x':x}, results)
    
    # Compile epsilon generating function
    n_batch = T.lscalar()
    n_batch.tag.test_value = 16
    eps = f_eps(n_batch, w)
    f_eps = G.function({'n_batch':n_batch}, eps)
    
    # Compile sampling function
    eps = {}
    for i in range(depth_model):
        eps['eps_'+str(i)] = T.tensor4('eps'+str(i))
        eps['eps_'+str(i)].tag.test_value = np.random.randn(n_batch_test,n_z,1,1).astype(floatX)
    image = f_decoder(eps, w_avg)
    f_decode = G.function(eps, image)
    
    return G.Struct(train=f_train, eval=f_eval, decode=f_decode, eps=f_eps, w=w, w_avg=w_avg)
Example #8
0
File: models.py Project: openai/iaf
 def f_eps_():
     n_batch = T.lscalar()
     n_batch.tag.test_value = 16
     eps = f_eps(n_batch, w)
     return G.function({'n_batch':n_batch}, eps, lazy=lazy)
Example #9
0
File: models.py Project: openai/iaf
 def f_eval():
     results = f_encode_decode(w_avg, False)
     return G.function({'x':x}, results)
Example #10
0
def fcvae(shape_x,
          depth_model,
          depth_ar,
          n_h1,
          n_h2,
          n_z,
          posterior,
          px='logistic',
          nl='softplus',
          alpha=0.002,
          beta1=0.1,
          beta2=0.001,
          share_w=False,
          data_init=None):
    _locals = locals()
    _locals.pop('data_init')
    print 'CVAE9 with ', _locals
    #assert posterior in ['diag1','diag2','iaf_linear','iaf_nonlinear']
    assert px in ['logistic', 'bernoulli']
    w = {}  # model params

    kernel_h = (1, 1)
    n_x = shape_x[0] * shape_x[1] * shape_x[2]

    # Input whitening
    if px == 'logistic':
        w['logsd_x'] = G.sharedf(0.)

    # encoder
    x_enc = N.conv.conv2d('x_enc', n_x, n_h1, (1, 1), w=w)
    x_dec = N.conv.conv2d('x_dec', n_h1, n_x, (1, 1), w=w)
    x_dec_nl = N.nonlinearity('x_dec_nl', nl, n_h1, w)

    layers = []
    for i in range(depth_model):
        name = str(i)
        if share_w:
            name = '[sharedw]' + str(i) + '[/sharedw]'
        layers.append(
            cvae_layer(name, posterior, n_h1, n_h2, n_z, depth_ar, False, nl,
                       kernel_h, share_w, w))

    # top-level value
    #w['h_top'] = G.sharedf(np.zeros((n_h1,)))
    w['h_top'] = G.sharedf(np.random.normal(0, 0.01, size=(n_h1, )))

    # Initialize variables
    x = T.tensor4('x')
    x.tag.test_value = data_init['x']
    n_batch_test = data_init['x'].shape[0]
    _x = T.clip(x / 255., 0, 1)

    # Objective function
    def f_cost(w, train=True):

        results = {}

        h = x_enc(_x.reshape((-1, n_x, 1, 1)) - .5, w)

        obj_logpz = 0
        obj_logqz = 0

        # bottom-up encoders
        for i in range(depth_model):
            h = layers[i].up(h, w)

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (_x.shape[0], 1, 1, 1))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h, _obj_logqz, _obj_logpz = layers[i].down_q(h, train, w)
            obj_logqz += _obj_logqz
            obj_logpz += _obj_logpz
            results['cost_z' + str(i).zfill(3)] = _obj_logqz - _obj_logpz

        output = .1 * x_dec(x_dec_nl(h, w), w).reshape(
            (-1, shape_x[0], shape_x[1], shape_x[2]))

        # empirical distribution
        if px == 'logistic':
            mean_x = T.clip(output, -.5, .5)
            logsd_x = 0 * mean_x + w['logsd_x']
            obj_logpx = N.rand.discretized_logistic(mean_x, logsd_x, 1 / 255.,
                                                    _x - .5).logp

            obj = obj_logpz - obj_logqz + obj_logpx
            # Compute the bits per pixel
            obj *= (1. / np.prod(shape_x) * 1. / np.log(2.)).astype('float32')

        elif px == 'bernoulli':
            prob_x = T.nnet.sigmoid(output)
            prob_x = T.minimum(prob_x, 1 - 1e-7)
            prob_x = T.maximum(prob_x, 1e-7)
            #prob_x = T.printing.Print('prob_x')(prob_x)
            obj_logpx = N.rand.bernoulli(prob_x, _x).logp

            #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
            #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
            #obj_logpx = T.printing.Print('obj_logpx')(obj_logpx)
            obj = obj_logpz - obj_logqz + obj_logpx
            #obj = T.printing.Print('obj')(obj)

        results['cost_x'] = -obj_logpx
        results['cost'] = -obj
        return results

        #print 'obj_logpz', obj_logpz.tag.test_value
        #print 'obj_logqz', obj_logqz.tag.test_value
        #print 'obj_logpx', obj_x.tag.test_value
        #obj_logpz = T.printing.Print('obj_logpz')(obj_logpz)
        #obj_logqz = T.printing.Print('obj_logqz')(obj_logqz)
        #obj_x = T.printing.Print('obj_logpx')(obj_x)

    # Turns Gaussian noise 'eps' into a sample
    def f_decoder(eps, w):

        # top-level activations
        h = T.tile(w['h_top'].dimshuffle('x', 0, 'x', 'x'),
                   (eps['eps_0'].shape[0], 1, 1, 1))

        # top-down priors, posteriors and decoders
        for i in list(reversed(range(depth_model))):
            h = layers[i].down_p(h, eps['eps_' + str(i)], w)

        output = .1 * x_dec(x_dec_nl(h, w), w).reshape(
            (-1, shape_x[0], shape_x[1], shape_x[2]))
        if px == 'logistic':
            mean_x = T.clip(output[:, :, :, :] + .5, 0, 1)
        elif px == 'bernoulli':
            mean_x = T.nnet.sigmoid(output)
        image = (255. * T.clip(mean_x, 0, 1)).astype('uint8')
        return image

    def f_eps(n_batch, w):
        eps = {}
        for i in range(depth_model):
            eps['eps_' + str(i)] = G.rng_curand.normal((n_batch, n_z, 1, 1),
                                                       dtype=floatX)
        return eps

    def postup(updates, w):
        nodes = [x_enc, x_dec]
        for n in nodes:
            updates = n.postup(updates, w)
        for i in range(depth_model):
            updates = layers[i].postup(updates, w)

        return updates

    # Compile init function
    if data_init != None:
        w['__init'] = OrderedDict()
        f_cost(w)
        w.pop('__init')
        #for i in w: print i, abs(w[i].get_value()).min(), abs(w[i].get_value()).max(), abs(w[i].get_value()).mean()

    # Compile training function
    results = f_cost(w)
    updates, (w_avg, ) = G.misc.optim.AdaMaxAvg([w],
                                                results['cost'],
                                                alpha=-alpha,
                                                beta1=beta1,
                                                beta2=beta2,
                                                disconnected_inputs='ignore')
    #todo: replace postup with below
    #w['_updates'] = updates
    #f_cost(w)
    #updates = w.pop('_updates')

    updates = postup(updates, w)
    f_train = G.function({'x': x}, results['cost'], updates=updates)

    # Compile evaluation function
    results = f_cost(w_avg, False)
    f_eval = G.function({'x': x}, results)

    # Compile epsilon generating function
    n_batch = T.lscalar()
    n_batch.tag.test_value = 16
    eps = f_eps(n_batch, w)
    f_eps = G.function({'n_batch': n_batch}, eps)

    # Compile sampling function
    eps = {}
    for i in range(depth_model):
        eps['eps_' + str(i)] = T.tensor4('eps' + str(i))
        eps['eps_' + str(i)].tag.test_value = np.random.randn(
            n_batch_test, n_z, 1, 1).astype(floatX)
    image = f_decoder(eps, w_avg)
    f_decode = G.function(eps, image)

    return G.Struct(train=f_train,
                    eval=f_eval,
                    decode=f_decode,
                    eps=f_eps,
                    w=w,
                    w_avg=w_avg)
Example #11
0
 def f_eps_():
     n_batch = T.lscalar()
     n_batch.tag.test_value = 16
     eps = f_eps(n_batch, w)
     return G.function({'n_batch': n_batch}, eps, lazy=lazy)
Example #12
0
 def f_eval():
     results = f_encode_decode(w_avg, False)
     return G.function({'x': x}, results)