Ejemplo n.º 1
0
def step_hmc_wz(model, x, z, hmc_stepsize=1e-2, hmc_steps=20):
    print 'step_hmc_wz', hmc_stepsize, hmc_steps

    n_batch = x.itervalues().next().shape[1]
    
    hmc_dostep_z = hmc.hmc_step_autotune(n_steps=hmc_steps, init_stepsize=hmc_stepsize)
    hmc_dostep_w = hmc.hmc_step_autotune(n_steps=hmc_steps, init_stepsize=hmc_stepsize)
    
    def dostep(w):
        
        def fgrad_z(_z):
            logpx, logpz, gw, gz = model.dlogpxz_dwz(w, x, _z)
            return logpx + logpz, gz
        
        logpxz, acceptRate, stepsize = hmc_dostep_z(fgrad_z, z)

        shapes_w = ndict.getShapes(w)
        
        def vectorize(d):
            v = {}
            for i in d: v[i] = d[i].reshape((d[i].size, -1))
            return v
        
        def fgrad_w(_w):
            _w = ndict.setShapes(_w, shapes_w)
            logpx, logpz, gw, gz = model.dlogpxz_dwz(_w, x, z)
            gw = vectorize(gw)
            return logpx + logpz, gw
        
        _w = vectorize(w)
        hmc_dostep_w(fgrad_w, _w)
        
        return z.copy(), logpxz.copy() 
        
    return dostep
Ejemplo n.º 2
0
def step_batch_mcem(model_p,
                    x,
                    z_mcmc,
                    dostep_m,
                    hmc_stepsize=1e-2,
                    hmc_steps=20,
                    m_steps=5):
    print 'Batch MCEM', hmc_stepsize, hmc_steps, m_steps

    n_batch = x.itervalues().next().shape[1]

    hmc_dostep = hmc.hmc_step_autotune(n_steps=hmc_steps,
                                       init_stepsize=hmc_stepsize)

    def doStep(w):
        def fgrad(_z):
            logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w, x, _z)
            return logpx + logpz, gz

        # E-step
        logpxz, acceptRate, stepsize = hmc_dostep(fgrad, z_mcmc)

        # M-step
        for i in range(m_steps):
            #print 'm-step:', i
            dostep_m(w, z_mcmc)

        return z_mcmc.copy(), logpxz.copy()

    return doStep
Ejemplo n.º 3
0
def step_hmc_wz(model, x, z, hmc_stepsize=1e-2, hmc_steps=20):
    print 'step_hmc_wz', hmc_stepsize, hmc_steps

    n_batch = x.itervalues().next().shape[1]

    hmc_dostep_z = hmc.hmc_step_autotune(n_steps=hmc_steps,
                                         init_stepsize=hmc_stepsize)
    hmc_dostep_w = hmc.hmc_step_autotune(n_steps=hmc_steps,
                                         init_stepsize=hmc_stepsize)

    def dostep(w):
        def fgrad_z(_z):
            logpx, logpz, gw, gz = model.dlogpxz_dwz(w, x, _z)
            return logpx + logpz, gz

        logpxz, acceptRate, stepsize = hmc_dostep_z(fgrad_z, z)

        shapes_w = ndict.getShapes(w)

        def vectorize(d):
            v = {}
            for i in d:
                v[i] = d[i].reshape((d[i].size, -1))
            return v

        def fgrad_w(_w):
            _w = ndict.setShapes(_w, shapes_w)
            logpx, logpz, gw, gz = model.dlogpxz_dwz(_w, x, z)
            gw = vectorize(gw)
            return logpx + logpz, gw

        _w = vectorize(w)
        hmc_dostep_w(fgrad_w, _w)

        return z.copy(), logpxz.copy()

    return dostep
Ejemplo n.º 4
0
def step_batch_mcem(model_p, x, z_mcmc, dostep_m, hmc_stepsize=1e-2, hmc_steps=20, m_steps=5):
    print 'Batch MCEM', hmc_stepsize, hmc_steps, m_steps
    
    n_batch = x.itervalues().next().shape[1]
    
    hmc_dostep = hmc.hmc_step_autotune(n_steps=hmc_steps, init_stepsize=hmc_stepsize)
    
    def doStep(w):
        
        def fgrad(_z):
            logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w, x, _z)
            return logpx + logpz, gz
        
        # E-step
        logpxz, acceptRate, stepsize = hmc_dostep(fgrad, z_mcmc)

        # M-step
        for i in range(m_steps):
            #print 'm-step:', i
            dostep_m(w, z_mcmc)
        
        return z_mcmc.copy(), logpxz.copy() 
        
    return doStep
Ejemplo n.º 5
0
def step_pvem(model_q,
              model_p,
              x,
              w_q,
              n_batch=100,
              ada_stepsize=1e-1,
              warmup=100,
              reg=1e-8,
              convertImgs=False):
    print 'Predictive VEM', ada_stepsize

    hmc_steps = 0
    hmc_dostep = hmc.hmc_step_autotune(n_steps=hmc_steps, init_stepsize=1e-1)

    # We're using adagrad stepsizes
    gw_q_ss = ndict.cloneZeros(w_q)
    gw_p_ss = ndict.cloneZeros(model_p.init_w())

    nsteps = [0]

    do_adagrad = True

    def doStep(w_p):

        #def fgrad(_z):
        #    logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w, x, _z)
        #    return logpx + logpz, gz
        n_tot = x.itervalues().next().shape[1]
        idx_minibatch = np.random.randint(0, n_tot, n_batch)
        x_minibatch = {i: x[i][:, idx_minibatch] for i in x}
        if convertImgs:
            x_minibatch = {i: x_minibatch[i] / 256. for i in x_minibatch}

        # step 1A: sample z ~ p(z|x) from model_q
        _, z, _ = model_q.gen_xz(w_q, x_minibatch, {}, n_batch)

        # step 1B: update z using HMC
        def fgrad(_z):
            logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w_p, _z, x_minibatch)
            return logpx + logpz, gz

        if (hmc_steps > 0):
            logpxz, _, _ = hmc_dostep(fgrad, z)

        def optimize(w, gw, gw_ss, stepsize):
            if do_adagrad:
                for i in gw:
                    gw_ss[i] += gw[i]**2
                    if nsteps[0] > warmup:
                        w[i] += stepsize / np.sqrt(gw_ss[i] + reg) * gw[i]
                    #print (stepsize / np.sqrt(gw_ss[i]+reg)).mean()
            else:
                for i in gw:
                    w[i] += 1e-4 * gw[i]

        # step 2: use z to update model_p
        logpx_p, logpz_p, gw_p, gz_p = model_p.dlogpxz_dwz(w_p, x_minibatch, z)
        _, gw_prior = model_p.dlogpw_dw(w_p)
        gw = {i: gw_p[i] + float(n_batch) / n_tot * gw_prior[i] for i in gw_p}
        optimize(w_p, gw, gw_p_ss, ada_stepsize)

        # step 3: use gradients of model_p to update model_q
        _, logpz_q, fd, gw_q = model_q.dfd_dw(w_q, x_minibatch, z, gz_p)
        _, gw_prior = model_q.dlogpw_dw(w_q)
        gw = {i: -gw_q[i] + float(n_batch) / n_tot * gw_prior[i] for i in gw_q}
        optimize(w_q, gw, gw_q_ss, ada_stepsize)

        nsteps[0] += 1

        return z.copy(), logpx_p + logpz_p - logpz_q

    return doStep
Ejemplo n.º 6
0
def step_pvem(model_q, model_p, x, w_q, n_batch=100, ada_stepsize=1e-1, warmup=100, reg=1e-8, convertImgs=False):
    print 'Predictive VEM', ada_stepsize
    
    hmc_steps=0
    hmc_dostep = hmc.hmc_step_autotune(n_steps=hmc_steps, init_stepsize=1e-1)
    
    # We're using adagrad stepsizes
    gw_q_ss = ndict.cloneZeros(w_q)
    gw_p_ss = ndict.cloneZeros(model_p.init_w())
    
    nsteps = [0]
    
    do_adagrad = True
    
    def doStep(w_p):
        
        #def fgrad(_z):
        #    logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w, x, _z)
        #    return logpx + logpz, gz
        n_tot = x.itervalues().next().shape[1]
        idx_minibatch = np.random.randint(0, n_tot, n_batch)
        x_minibatch = {i:x[i][:,idx_minibatch] for i in x}
        if convertImgs: x_minibatch = {i:x_minibatch[i]/256. for i in x_minibatch}
            
        # step 1A: sample z ~ p(z|x) from model_q
        _, z, _  = model_q.gen_xz(w_q, x_minibatch, {}, n_batch)
        
        # step 1B: update z using HMC
        def fgrad(_z):
            logpx, logpz, gw, gz = model_p.dlogpxz_dwz(w_p, _z, x_minibatch)
            return logpx + logpz, gz
        if (hmc_steps > 0):
            logpxz, _, _ = hmc_dostep(fgrad, z)

        def optimize(w, gw, gw_ss, stepsize):
            if do_adagrad:
                for i in gw:
                    gw_ss[i] += gw[i]**2
                    if nsteps[0] > warmup:
                        w[i] += stepsize / np.sqrt(gw_ss[i]+reg) * gw[i]
                    #print (stepsize / np.sqrt(gw_ss[i]+reg)).mean()
            else:
                for i in gw:
                    w[i] += 1e-4 * gw[i]
        
        # step 2: use z to update model_p
        logpx_p, logpz_p, gw_p, gz_p = model_p.dlogpxz_dwz(w_p, x_minibatch, z)
        _, gw_prior = model_p.dlogpw_dw(w_p)
        gw = {i: gw_p[i] + float(n_batch)/n_tot * gw_prior[i] for i in gw_p}
        optimize(w_p, gw, gw_p_ss, ada_stepsize)
        
        # step 3: use gradients of model_p to update model_q
        _, logpz_q, fd, gw_q = model_q.dfd_dw(w_q, x_minibatch, z, gz_p)
        _, gw_prior = model_q.dlogpw_dw(w_q)
        gw = {i: -gw_q[i] + float(n_batch)/n_tot * gw_prior[i] for i in gw_q}
        optimize(w_q, gw, gw_q_ss, ada_stepsize)
        
        nsteps[0] += 1
        
        return z.copy(), logpx_p + logpz_p - logpz_q
        
    return doStep