Ejemplo n.º 1
0
 def eval(self, x, z):
     x, z = self.xz_to_theano(x, z)
     z, x = ndict.ordereddicts((z, x))
     A = self.get_A(x)
     allvars = x.values() + z.values() + [A]
     L = self.f_eval(*allvars)
     return L[0]
Ejemplo n.º 2
0
 def distribution(self, v, w, x, z, name):
     x, z = self.xz_to_theano(x, z)
     v, w, x, z = ndict.ordereddicts((v, w, x, z))
     A = self.get_A(x)
     allvars = list(v.values()) + list(w.values()) + list(
         x.values()) + list(z.values()) + [A]
     return self.f_dists[name](*allvars)
Ejemplo n.º 3
0
 def evalAndUpdate(self, x, z):
     x, z = self.xz_to_theano(x, z)
     z, x = ndict.ordereddicts((z, x))
     A = self.get_A(x)
     allvars = list(x.values()) + list(z.values()) + [A]
     L = self.f_evalAndUpdate(*allvars)
     return L[0]
Ejemplo n.º 4
0
 def evalAndUpdate(self, x, z):
     x, z = self.xz_to_theano(x, z)
     z, x = ndict.ordereddicts((z, x))
     A = self.get_A(x)
     allvars = x.values() + z.values() + [A]
     L = self.f_evalAndUpdate(*allvars)
     return L[0]
Ejemplo n.º 5
0
 def eval_for_classcondition_prior(self, x, z):
     x, z = self.xz_to_theano(x, z)
     z, x = ndict.ordereddicts((z, x))
     A = self.get_A(x)
     allvars = x.values() + z.values() + [A]
     L = self.f_eval_for_classcondition_prior(*allvars)
     return L[0]
Ejemplo n.º 6
0
    def dfd_dw(self, w, x, z, gz2):
        x, z = self.xz_to_theano(x, z)
        w, z, x, gz2 = ndict.ordereddicts((w, z, x, gz2))
        A = self.get_A(x)
        r = self.f_dfd_dw(*(list(w.values()) + list(x.values()) +
                            list(z.values()) + [A] + list(gz2.values())))
        logpx, logpz, fd, gw = r[0], r[1], r[2], dict(
            list(zip(list(w.keys()), r[3:3 + len(w)])))

        if ndict.hasNaN(gw):
            if True:
                print('NaN detected in gradients')
                raise Exception()
                for i in gw:
                    gw[i][np.isnan(gw[i])] = 0
            else:

                print('fd: ', fd)
                print('Values:')
                ndict.p(w)
                ndict.p(z)
                print('Gradients:')
                ndict.p(gw)
                raise Exception("dfd_dw(): NaN found in gradients")

        gw, _ = self.gwgz_to_numpy(gw, {})
        return logpx, logpz, fd, gw
Ejemplo n.º 7
0
    def gen_xz(self, v, w, x, z, n_batch):

        v, w, x, z = ndict.ordereddicts((v, w, x, z))

        A = np.ones((1, n_batch))

        _z = {}

        # If x['x'] and x['y'] were given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and x.has_key('y') and not z.has_key('z'):

            q_mean, q_logvar = self.dist_qz['z'](*([x['x'], x['y']] +
                                                   list(v.values()) + [A]))
            _z['mean'] = q_mean
            _z['logvar'] = q_logvar

            # Require epsilon
            if not z.has_key('eps'):
                z['eps'] = self.gen_eps(n_batch)['eps']

            z['z'] = q_mean + np.exp(0.5 * q_logvar) * z['eps']

        else:
            if not z.has_key('z'):
                if self.type_pz in ['gaussian', 'gaussianmarg']:
                    z['z'] = np.random.standard_normal(size=(self.n_z,
                                                             n_batch))
                elif self.type_pz == 'laplace':
                    z['z'] = np.random.laplace(size=(self.n_z, n_batch))
                elif self.type_pz == 'studentt':
                    z['z'] = np.random.standard_t(np.dot(np.exp(w['logv']), A))
            if not x.has_key('y'):
                py = self.dist_px['y'](*(list(w.values()) + [A]))
                _z['y'] = py
                x['y'] = np.zeros(py.shape)
                # np.random.multinomial requires loop. Faster possible?
                for i in range(py.shape[1]):
                    x['y'][:, i] = np.random.multinomial(n=1, pvals=py[:, i])

        # Generate from p(x|z)

        if self.type_px == 'bernoulli':
            p = self.dist_px['x'](*([x['y'], z['z']] + list(w.values()) + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                x['x'] = np.random.binomial(n=1, p=p)
        elif self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([x['y'], z['z']] +
                                                   list(w.values()) + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar / 2))
                if self.type_px == 'sigmoidgaussian':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])

        else:
            raise Exception("")

        return x, z, _z
Ejemplo n.º 8
0
 def eval_for_classcondition_prior(self, x, z):
     x, z = self.xz_to_theano(x, z)
     z, x = ndict.ordereddicts((z, x))
     A = self.get_A(x)
     allvars = x.values() + z.values() + [A]
     L = self.f_eval_for_classcondition_prior(*allvars)
     return L[0]
Ejemplo n.º 9
0
    def gen_xz_prior(self, x, z, mean_prior, sigma_square, n_batch):

        x, z = ndict.ordereddicts((x, z))

        A = np.ones((1, n_batch)).astype(np.float32)
        for i in z:
            z[i] = z[i].astype(np.float32)
        for i in x:
            x[i] = x[i].astype(np.float32)
        tmp = np.random.standard_normal(size=(self.n_z,
                                              n_batch)).astype(np.float32)
        z['z'] = tmp * np.sqrt(sigma_square) + mean_prior

        if self.type_px == 'bernoulli':
            x['x'] = self.dist_px['x'](*([z['z']] + [A]))
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar / 2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])

        else:
            raise Exception("")

        return x
Ejemplo n.º 10
0
    def gen_xz(self, v, w, x, z, n_batch):
        
        v, w, x, z = ndict.ordereddicts((v, w, x, z))
        
        A = np.ones((1, n_batch))
        
        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and not z.has_key('z'):

            q_mean, q_logvar = self.dist_qz['z'](*([x['x']] + v.values() + [A]))
            _z['mean'] = q_mean
            _z['logvar'] = q_logvar
            
            # Require epsilon
            if not z.has_key('eps'):
                z['eps'] = self.gen_eps(n_batch)['eps']
            
            z['z'] = q_mean + np.exp(0.5 * q_logvar) * z['eps']
            
        else:
            if not z.has_key('z'):
                if self.type_pz in ['gaussian','gaussianmarg']:
                    z['z'] = np.random.standard_normal(size=(self.n_z, n_batch))
                elif self.type_pz == 'laplace':
                    z['z'] = np.random.laplace(size=(self.n_z, n_batch))
                elif self.type_pz == 'studentt':
                    z['z'] = np.random.standard_t(np.dot(np.exp(w['logv']), A))
        
        # Generate from p(x|z)
        
        if self.type_px == 'bernoulli':
            p = self.dist_px['x'](*([z['z']] + w.values() + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                x['x'] = np.random.binomial(n=1,p=p)
        elif self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + w.values() + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar/2))
                if self.type_px == 'sigmoidgaussian':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
        
        else: raise Exception("")
        
        if not x.has_key('y'):
            py = self.dist_px['y'](*([x['x']] + v.values() + w.values() + [A]))
            _z['y'] = py
            x['y'] = np.zeros(py.shape)
            # np.random.multinomial requires loop. Faster possible?
            for i in range(py.shape[1]):
                x['y'][:,i] = np.random.multinomial(n=1, pvals=py[:,i])

        return x, z, _z
Ejemplo n.º 11
0
    def gen_xz(self, x, z, n_batch):
        
        x, z = ndict.ordereddicts((x, z))
        
        A = np.ones((1, n_batch)).astype(np.float32)
        for i in z: z[i] = z[i].astype(np.float32)
        for i in x: x[i] = x[i].astype(np.float32)
        
        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if 'x' in x and 'z' not in z:

            q_mean, q_logvar = self.dist_qz['z'](*([x['x']] + [A]))
            _z['mean'] = q_mean
            _z['logvar'] = q_logvar
            
            # Require epsilon
            if 'eps' not in z:
                eps = self.gen_eps(n_batch)['eps']
            
            z['z'] = q_mean + np.exp(0.5 * q_logvar) * eps
            
        elif 'z' not in z:
            if self.type_pz in ['gaussian','gaussianmarg']:
                z['z'] = np.random.standard_normal(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'laplace':
                z['z'] = np.random.laplace(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'studentt':
                z['z'] = np.random.standard_t(np.dot(np.exp(self.w['logv'].get_value()), A)).astype(np.float32)
            elif self.type_pz == 'mog':
                i = np.random.randint(self.n_mixture)
                loc = np.dot(self.w['mog_mean'+str(i)].get_value(), A)
                scale = np.dot(np.exp(.5*self.w['mog_logvar'+str(i)].get_value()), A)
                z['z'] = np.random.normal(loc=loc, scale=scale).astype(np.float32)
            else:
                raise Exception('Unknown type_pz')
        # Generate from p(x|z)
        
        if self.type_px == 'bernoulli':
            p = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = p
            if 'x' not in x:
                x['x'] = np.random.binomial(n=1,p=p)
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if 'x' not in x:
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar/2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
        
        else: raise Exception("")
        
        return x, z, _z
Ejemplo n.º 12
0
    def gen_xz(self, x, z, n_batch):
        
        x, z = ndict.ordereddicts((x, z))
        
        A = np.ones((1, n_batch)).astype(np.float32)
        for i in z: z[i] = z[i].astype(np.float32)
        for i in x: x[i] = x[i].astype(np.float32)
        
        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and not z.has_key('z'):

            q_mean, q_logvar = self.dist_qz['z'](*([x['x']] + [A]))
            _z['mean'] = q_mean
            _z['logvar'] = q_logvar
            
            # Require epsilon
            if not z.has_key('eps'):
                eps = self.gen_eps(n_batch)['eps']
            
            z['z'] = q_mean + np.exp(0.5 * q_logvar) * eps
            
        elif not z.has_key('z'):
            if self.type_pz in ['gaussian','gaussianmarg']:
                z['z'] = np.random.standard_normal(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'laplace':
                z['z'] = np.random.laplace(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'studentt':
                z['z'] = np.random.standard_t(np.dot(np.exp(self.w['logv'].get_value()), A)).astype(np.float32)
            elif self.type_pz == 'mog':
                i = np.random.randint(self.n_mixture)
                loc = np.dot(self.w['mog_mean'+str(i)].get_value(), A)
                scale = np.dot(np.exp(.5*self.w['mog_logvar'+str(i)].get_value()), A)
                z['z'] = np.random.normal(loc=loc, scale=scale).astype(np.float32)
            else:
                raise Exception('Unknown type_pz')
        # Generate from p(x|z)
        
        if self.type_px == 'bernoulli':
            p = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                x['x'] = np.random.binomial(n=1,p=p)
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar/2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
        
        else: raise Exception("")
        
        return x, z, _z
Ejemplo n.º 13
0
 def dL_dw(self, v, w, x, z):
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = list(v.values()) + list(w.values()) + list(x.values()) + list(z.values()) + [A]
     r = self.f_dL_dw(*allvars)
     logpx, logpz, logqz, gv, gw = r[0], r[1], r[2], dict(zip(v.keys(), r[3:3+len(v)])), dict(zip(w.keys(), r[3+len(v):3+len(v)+len(w)]))
     self.checknan(v, w, gv, gw)
     gv, gw = self.gw_to_numpy(gv, gw)
     return logpx, logpz, logqz, gv, gw
Ejemplo n.º 14
0
 def dL_dw(self, v, w, x, z):
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = v.values() + w.values() + x.values() + z.values() + [A]
     r = self.f_dL_dw(*allvars)
     logpx, logpz, logqz, gv, gw = r[0], r[1], r[2], dict(zip(v.keys(), r[3:3+len(v)])), dict(zip(w.keys(), r[3+len(v):3+len(v)+len(w)]))
     self.checknan(v, w, gv, gw)        
     gv, gw = self.gw_to_numpy(gv, gw)
     return logpx, logpz, logqz, gv, gw
Ejemplo n.º 15
0
 def dL_weighted_dw(self, v, w, x, z, weights):
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = list(v.values()) + list(w.values()) + list(x.values()) + list(z.values()) + [A]
     r = self.f_dL_weighted_dw(*(allvars+[weights]))
     L_unweighted, L_weighted, gv, gw = r[0], r[1], dict(zip(v.keys(), r[2:2+len(v)])), dict(zip(w.keys(), r[2+len(v):2+len(v)+len(w)]))
     self.checknan(v, w, gv, gw)
     gv, gw = self.gw_to_numpy(gv, gw)
     return L_unweighted, L_weighted, gv, gw
Ejemplo n.º 16
0
 def dL_weighted_dw(self, v, w, x, z, weights):
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = v.values() + w.values() + x.values() + z.values() + [A]
     r = self.f_dL_weighted_dw(*(allvars+[weights]))
     L_unweighted, L_weighted, gv, gw = r[0], r[1], dict(zip(v.keys(), r[2:2+len(v)])), dict(zip(w.keys(), r[2+len(v):2+len(v)+len(w)]))
     self.checknan(v, w, gv, gw)
     gv, gw = self.gw_to_numpy(gv, gw)
     return L_unweighted, L_weighted, gv, gw
Ejemplo n.º 17
0
    def __init__(self, get_optimizer, theano_warning='raise'):

        v = self.v
        w = self.w
        theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
        theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')

        # Create theano expressions
        x, z = ndict.ordereddicts(self.variables())
        self.var_x, self.var_z, = x, z

        # Helper variables
        A = T.fmatrix('A')
        self.var_A = A

        # Get gradient symbols
        allvars = list(x.values()) + list(
            z.values()) + [A]  # note: '+' concatenates lists

        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler than this)
        self.dist_qz = {}
        self.dist_px = {}
        self.dist_pz = {}

        logpx, logpz, logqz = self.factors(x, z, A)

        if get_optimizer == None:

            def get_optimizer(w, g):
                from collections import OrderedDict
                updates = OrderedDict()
                for i in w:
                    updates[w[i]] = w[i]
                return updates

        # Log-likelihood lower bound
        self.f_L = theanofunction(allvars, [logpx, logpz, logqz])
        L = (logpx + logpz - logqz).sum()
        g = T.grad(L, list(v.values()) + list(w.values()))
        gv, gw = dict(list(zip(list(v.keys()), g[0:len(v)]))), dict(
            list(zip(list(w.keys()), g[len(v):len(v) + len(w)])))
        updates = get_optimizer(v, gv)
        updates.update(get_optimizer(w, gw))

        #self.profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
        #self.f_evalAndUpdate = theano.function(allvars, [logpx + logpz - logqz], updates=updates_w, mode=self.profmode)
        #theano.printing.debugprint(self.f_evalAndUpdate)

        self.f_eval = theanofunction(allvars, [logpx + logpz - logqz])
        self.f_evalAndUpdate = theanofunction(allvars, [logpx + logpz - logqz],
                                              updates=updates)
Ejemplo n.º 18
0
 def dL2_dw(self, v, w, x, z):
     ''' gradient for when only 'x' is given (and not 'y') '''
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = v.values() + w.values() + x.values() + z.values() + [A]
     r = self.f_dL2_dw(*allvars)
     logpx, logpz, logqz, gv, gw = r[0], r[1], r[2], dict(
         zip(v.keys(), r[3:3 + len(v)])), dict(
             zip(w.keys(), r[3 + len(v):3 + len(v) + len(w)]))
     self.checknan(v, w, gv, gw)
     gv, gw = self.gw_to_numpy(gv, gw)
     return logpx, logpz, logqz, gv, gw
Ejemplo n.º 19
0
    def gen_xz(self, v, w, x, z, n_batch):

        v, w, x, z = ndict.ordereddicts((v, w, x, z))

        A = np.ones((1, n_batch))

        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and not z.has_key('z'):

            q_mean, q_logvar = self.dist_qz['z'](*([x['x']] + v.values() +
                                                   [A]))
            _z['mean'] = q_mean
            _z['logvar'] = q_logvar

            # Require epsilon
            if not z.has_key('eps'):
                z['eps'] = self.gen_eps(n_batch)['eps']

            z['z'] = q_mean + np.exp(0.5 * q_logvar) * z['eps']

        elif not z.has_key('z'):
            if self.type_pz in ['gaussian', 'gaussianmarg']:
                z['z'] = np.random.standard_normal(size=(self.n_z, n_batch))
            elif self.type_pz == 'laplace':
                z['z'] = np.random.laplace(size=(self.n_z, n_batch))
            elif self.type_pz == 'studentt':
                z['z'] = np.random.standard_t(np.dot(np.exp(w['logv']), A))

        # Generate from p(x|z)

        if self.type_px == 'bernoulli':
            p = self.dist_px['x'](*([z['z']] + w.values() + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                x['x'] = np.random.binomial(n=1, p=p)
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + w.values() +
                                                   [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar / 2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])

        else:
            raise Exception("")

        return x, z, _z
Ejemplo n.º 20
0
    def __init__(self, get_optimizer, theano_warning="raise"):

        v = self.v
        w = self.w
        theanofunction = lazytheanofunc("warn", mode="FAST_RUN")
        theanofunction_silent = lazytheanofunc("ignore", mode="FAST_RUN")

        # Create theano expressions
        x, z = ndict.ordereddicts(self.variables())
        self.var_x, self.var_z, = x, z

        # Helper variables
        A = T.fmatrix("A")
        self.var_A = A

        # Get gradient symbols
        allvars = x.values() + z.values() + [A]  # note: '+' concatenates lists

        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler than this)
        self.dist_qz = {}
        self.dist_px = {}
        self.dist_pz = {}

        logpx, logpz, logqz = self.factors(x, z, A)

        if get_optimizer == None:

            def get_optimizer(w, g):
                from collections import OrderedDict

                updates = OrderedDict()
                for i in w:
                    updates[w[i]] = w[i]
                return updates

        # Log-likelihood lower bound
        self.f_L = theanofunction(allvars, [logpx, logpz, logqz])
        L = (logpx + logpz - logqz).sum()
        g = T.grad(L, v.values() + w.values())
        gv, gw = dict(zip(v.keys(), g[0 : len(v)])), dict(zip(w.keys(), g[len(v) : len(v) + len(w)]))
        updates = get_optimizer(v, gv)
        updates.update(get_optimizer(w, gw))

        # self.profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
        # self.f_evalAndUpdate = theano.function(allvars, [logpx + logpz - logqz], updates=updates_w, mode=self.profmode)
        # theano.printing.debugprint(self.f_evalAndUpdate)

        self.f_eval = theanofunction(allvars, [logpx + logpz - logqz])
        self.f_evalAndUpdate = theanofunction(allvars, [logpx + logpz - logqz], updates=updates)
Ejemplo n.º 21
0
    def logpxz(self, w, x, z):
        x, z = self.xz_to_theano(x, z)
        w, z, x = ndict.ordereddicts((w, z, x))
        A = self.get_A(x)
        allvars = list(w.values()) + list(x.values()) + list(z.values()) + [A]
        logpx, logpz = self.f_logpxz(*allvars)
        if np.isnan(logpx).any() or np.isnan(logpz).any():
            print('v: ', logpx, logpz)
            print('Values:')
            ndict.p(w)
            ndict.p(z)
            raise Exception("dlogpxz_dwz(): NaN found in gradients")

        return logpx, logpz
Ejemplo n.º 22
0
 def logpxz(self, w, x, z):
     x, z = self.xz_to_theano(x, z)
     w, z, x = ndict.ordereddicts((w, z, x))
     A = self.get_A(x)
     allvars = w.values() + x.values() + z.values() + [A]
     logpx, logpz = self.f_logpxz(*allvars)
     if np.isnan(logpx).any() or np.isnan(logpz).any():
         print 'v: ', logpx, logpz
         print 'Values:'
         ndict.p(w)
         ndict.p(z)
         raise Exception("dlogpxz_dwz(): NaN found in gradients")
     
     return logpx, logpz
Ejemplo n.º 23
0
    def __init__(self, theano_warning='raise'):

        theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
        theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')

        # Create theano expressions
        v, w, x, z = ndict.ordereddicts(self.variables())
        self.var_v, self.var_w, self.var_x, self.var_z, = v, w, x, z

        # Helper variables
        A = T.dmatrix('A')
        self.var_A = A

        # Get gradient symbols
        allvars = list(v.values()) + list(w.values()) + list(
            x.values()) + list(z.values()) + [A
                                              ]  # note: '+' concatenates lists

        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler than this)
        self.dist_qz = {}
        self.dist_px = {}
        self.dist_pz = {}

        logpv, logpw, logpx, logpz, logqz = self.factors(v, w, x, z, A)

        # Log-likelihood lower bound
        self.f_L = theanofunction(allvars, [logpx, logpz, logqz])
        L = (logpx + logpz - logqz).sum()
        dL_dw = T.grad(L, list(v.values()) + list(w.values()))
        self.f_dL_dw = theanofunction(allvars, [logpx, logpz, logqz] + dL_dw)

        weights = T.dmatrix()
        dL_weighted_dw = T.grad((weights * (logpx + logpz - logqz)).sum(),
                                list(v.values()) + list(w.values()))
        self.f_dL_weighted_dw = theanofunction(
            allvars + [weights],
            [logpx + logpz - logqz, weights *
             (logpx + logpz - logqz)] + dL_weighted_dw)

        # prior
        dlogpw_dw = T.grad(logpv + logpw,
                           list(v.values()) + list(w.values()),
                           disconnected_inputs='ignore')
        self.f_logpw = theanofunction(
            list(v.values()) + list(w.values()), [logpv, logpw])
        self.f_dlogpw_dw = theanofunction(
            list(v.values()) + list(w.values()), [logpv, logpw] + dlogpw_dw)
    def L(self, v, w, x, z):
        x, z = self.xz_to_theano(x, z)
        v, w, z, x = ndict.ordereddicts((v, w, z, x))
        A = self.get_A(x)
        allvars = list(v.values()) + list(w.values()) + list(x.values()) + list(z.values()) + [A]
        logpx, logpz, logqz = self.f_L(*allvars)

        if np.isnan(logpx).any() or np.isnan(logpz).any() or np.isnan(logqz).any():
            print('logp: ', logpx, logpz, logqz)
            print('Values:')
            ndict.p(v)
            ndict.p(w)
            ndict.p(x)
            ndict.p(z)
            raise Exception("delbo_dwz(): NaN found in gradients")

        return logpx, logpz, logqz
Ejemplo n.º 25
0
 def L(self, v, w, x, z):
     x, z = self.xz_to_theano(x, z)
     v, w, z, x = ndict.ordereddicts((v, w, z, x))
     A = self.get_A(x)
     allvars = v.values() + w.values() + x.values() + z.values() + [A]
     logpx, logpz, logqz = self.f_L(*allvars)
     
     if np.isnan(logpx).any() or np.isnan(logpz).any() or np.isnan(logqz).any():
         print 'logp: ', logpx, logpz, logqz
         print 'Values:'
         ndict.p(v)
         ndict.p(w)
         ndict.p(x)
         ndict.p(z)
         raise Exception("delbo_dwz(): NaN found in gradients")
     
     return logpx, logpz, logqz
Ejemplo n.º 26
0
 def gen_xz_prior11(self, x, z, mean_prior, sigma_square, n_batch):
     
     x, z = ndict.ordereddicts((x, z))
     A = np.ones((1, n_batch)).astype(np.float32)
     z['z'] = mean_prior.astype(np.float32)
     
     if self.type_px == 'bernoulli':
         x['x'] = self.dist_px['x'](*([z['z']] + [A]))
     elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
         x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
         if not x.has_key('x'):
             x['x'] = np.random.normal(x_mean, np.exp(x_logvar/2))
             if self.type_px == 'bounded01':
                 x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                 x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
                 
     else: raise Exception("")
     
     return x
Ejemplo n.º 27
0
    def dlogpxz_dwz(self, w, x, z):

        x, z = self.xz_to_theano(x, z)
        w, z, x = ndict.ordereddicts((w, z, x))
        A = self.get_A(x)
        allvars = list(w.values()) + list(x.values()) + list(z.values()) + [A]

        # Check if keys are correct
        keys = list(w.keys()) + list(x.keys()) + list(z.keys()) + ['A']
        for i in range(len(keys)):
            if keys[i] != self.allvars_keys[i]:
                "Input values are incorrect!"
                print('Input:', keys)
                print('Should be:', self.allvars_keys)
                raise Exception()

        r = self.f_dlogpxz_dwz(*allvars)
        logpx, logpz, gw, gz = r[0], r[1], dict(
            list(zip(list(w.keys()), r[2:2 + len(w)]))), dict(
                list(zip(list(z.keys()), r[2 + len(w):])))

        if ndict.hasNaN(gw) or ndict.hasNaN(gz):
            if True:
                print('NaN detected in gradients')
                raise Exception()
                for i in gw:
                    gw[i][np.isnan(gw[i])] = 0
                for i in gz:
                    gz[i][np.isnan(gz[i])] = 0
            else:
                print('logpx: ', logpx)
                print('logpz: ', logpz)
                print('Values:')
                ndict.p(w)
                ndict.p(z)
                print('Gradients:')
                ndict.p(gw)
                ndict.p(gz)
                raise Exception("dlogpxz_dwz(): NaN found in gradients")

        gw, gz = self.gwgz_to_numpy(gw, gz)
        return logpx, logpz, gw, gz
Ejemplo n.º 28
0
 def dlogpxz_dwz(self, w, x, z):
     
     x, z = self.xz_to_theano(x, z)
     w, z, x = ndict.ordereddicts((w, z, x))
     A = self.get_A(x)
     allvars = w.values() + x.values() + z.values() + [A]
     
     # Check if keys are correct
     keys = w.keys() + x.keys() + z.keys() + ['A']
     for i in range(len(keys)):
         if keys[i] != self.allvars_keys[i]:
             "Input values are incorrect!"
             print 'Input:', keys
             print 'Should be:', self.allvars_keys
             raise Exception()
         
     r = self.f_dlogpxz_dwz(*allvars)
     logpx, logpz, gw, gz = r[0], r[1], dict(zip(w.keys(), r[2:2+len(w)])), dict(zip(z.keys(), r[2+len(w):]))
     
     if ndict.hasNaN(gw) or ndict.hasNaN(gz):
         if True:
             print 'NaN detected in gradients'
             raise Exception()
             for i in gw: gw[i][np.isnan(gw[i])] = 0
             for i in gz: gz[i][np.isnan(gz[i])] = 0
         else:
             print 'logpx: ', logpx
             print 'logpz: ', logpz
             print 'Values:'
             ndict.p(w)
             ndict.p(z)
             print 'Gradients:'
             ndict.p(gw)
             ndict.p(gz)
             raise Exception("dlogpxz_dwz(): NaN found in gradients")
     
     gw, gz = self.gwgz_to_numpy(gw, gz)
     return logpx, logpz, gw, gz
Ejemplo n.º 29
0
 def __init__(self, theano_warning='raise'):
     
     theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
     theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')
     
     # Create theano expressions
     v, w, x, z = ndict.ordereddicts(self.variables())
     self.var_v, self.var_w, self.var_x, self.var_z, = v, w, x, z
     
     # Helper variables
     A = T.dmatrix('A')
     self.var_A = A
     
     # Get gradient symbols
     allvars = v.values() + w.values() + x.values() + z.values() + [A] # note: '+' concatenates lists
     
     # TODO: more beautiful/standardized way of setting distributions
     # (should be even simpler than this) 
     self.dist_qz = {}
     self.dist_px = {}
     self.dist_pz = {}
     
     logpv, logpw, logpx, logpz, logqz = self.factors(v, w, x, z, A)
     
     # Log-likelihood lower bound
     self.f_L = theanofunction(allvars, [logpx, logpz, logqz])
     L = (logpx + logpz - logqz).sum()
     dL_dw = T.grad(L, v.values() + w.values())
     self.f_dL_dw = theanofunction(allvars, [logpx, logpz, logqz] + dL_dw)
     
     weights = T.dmatrix()
     dL_weighted_dw = T.grad((weights * (logpx + logpz - logqz)).sum(), v.values() + w.values())
     self.f_dL_weighted_dw = theanofunction(allvars + [weights], [logpx + logpz - logqz, weights*(logpx + logpz - logqz)] + dL_weighted_dw)
     
     # prior
     dlogpw_dw = T.grad(logpv + logpw, v.values() + w.values(), disconnected_inputs='ignore')
     self.f_logpw = theanofunction(v.values() + w.values(), [logpv, logpw])
     self.f_dlogpw_dw = theanofunction(v.values() + w.values(), [logpv, logpw] + dlogpw_dw)
Ejemplo n.º 30
0
 def dfd_dw(self, w, x, z, gz2):
     x, z = self.xz_to_theano(x, z)
     w, z, x, gz2 = ndict.ordereddicts((w, z, x, gz2))
     A = self.get_A(x)
     r = self.f_dfd_dw(*(w.values() + x.values() + z.values() + [A] + gz2.values()))
     logpx, logpz, fd, gw = r[0], r[1], r[2], dict(zip(w.keys(), r[3:3+len(w)]))
     
     if ndict.hasNaN(gw):
         if True:
             print 'NaN detected in gradients'
             raise Exception()
             for i in gw: gw[i][np.isnan(gw[i])] = 0
         else:
             
             print 'fd: ', fd
             print 'Values:'
             ndict.p(w)
             ndict.p(z)
             print 'Gradients:'
             ndict.p(gw)
             raise Exception("dfd_dw(): NaN found in gradients")
     
     gw, _ = self.gwgz_to_numpy(gw, {})
     return logpx, logpz, fd, gw
Ejemplo n.º 31
0
    def __init__(self, get_optimizer, theano_warning='raise'):

        v = self.v
        w = self.w
        theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
        theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')

        # Create theano expressions
        x, z = ndict.ordereddicts(self.variables())
        self.var_x, self.var_z, = x, z

        # Helper variables
        A = T.fmatrix('A')
        self.var_A = A
        '''
        # Get gradient symbols
        print 'model, x'
        for (d, xx) in x.items():
          print d
          print xx.shape
          
        print x.values()
        '''

        allvars = x.values() + z.values() + [A]  # note: '+' concatenates lists

        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler than this)
        self.dist_qz = {}
        self.dist_px = {}
        self.dist_pz = {}

        factors = self.factors(x, z, A)
        if len(factors) == 3:
            (logpx, logpz, logqz) = factors
            cost = 0
            sparsity_penalty = 0
        else:
            (logpx, logpz, logqz, cost, sparsity_penalty) = factors

        if get_optimizer == None:

            def get_optimizer(w, g):
                from collections import OrderedDict
                updates = OrderedDict()
                for i in w:
                    updates[w[i]] = w[i]
                return updates

        # Log-likelihood lower bound
        self.f_L = theanofunction(
            allvars, [logpx, logpz, logqz, cost, sparsity_penalty])
        L = (logpx + logpz - logqz).sum() - cost - sparsity_penalty

        g = T.grad(L, v.values() + w.values())
        gv, gw = dict(zip(v.keys(), g[0:len(v)])), dict(
            zip(w.keys(), g[len(v):len(v) + len(w)]))
        updates = get_optimizer(v, gv)
        updates.update(get_optimizer(w, gw))

        #self.profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
        #self.f_evalAndUpdate = theano.function(allvars, [logpx + logpz - logqz], updates=updates_w, mode=self.profmode)
        #theano.printing.debugprint(self.f_evalAndUpdate)

        self.f_eval_test = theanofunction(
            allvars, [logpx + logpz - logqz, logpx, logpz, -logqz])
        self.f_eval = theanofunction(allvars, [logpx + logpz - logqz])
        self.f_evalAndUpdate = theanofunction(allvars, [logpx + logpz - logqz],
                                              updates=updates)
        self.f_eval_for_classcondition_prior = theanofunction(
            allvars, [logpx - logqz])
Ejemplo n.º 32
0
    def gen_xz(self, x, z, n_batch):

        x, z = ndict.ordereddicts((x, z))

        A = np.ones((1, n_batch)).astype(np.float32)

        for i in z:
            z[i] = z[i].astype(np.float32)
        for i in x:
            x[i] = x[i].astype(np.float32)

        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and not z.has_key('z'):
            '''
            print x['x'].shape
            print x['mean_prior'].shape
            print A.shape
            '''
            q_mean, q_logvar = self.dist_qz['z'](*([x['x'], x['mean_prior']] +
                                                   [A]))
            q_hidden = self.dist_qz['hidden'](*([x['x'], x['mean_prior']] +
                                                [A]))

            _z['mean'] = q_mean
            _z['logvar'] = q_logvar
            _z['hidden'] = q_hidden

            # Require epsilon
            if not z.has_key('eps'):
                eps = self.gen_eps(n_batch)['eps']

            z['z'] = q_mean + np.exp(0.5 * q_logvar) * eps

        elif not z.has_key('z'):
            if self.type_pz in ['gaussian', 'gaussianmarg']:
                z['z'] = np.random.standard_normal(size=(self.n_z,
                                                         n_batch)).astype(
                                                             np.float32)
            elif self.type_pz == 'laplace':
                z['z'] = np.random.laplace(size=(self.n_z,
                                                 n_batch)).astype(np.float32)
            elif self.type_pz == 'studentt':
                z['z'] = np.random.standard_t(
                    np.dot(np.exp(self.w['logv'].get_value()),
                           A)).astype(np.float32)
            elif self.type_pz == 'mog':
                i = np.random.randint(self.n_mixture)
                loc = np.dot(self.w['mog_mean' + str(i)].get_value(), A)
                scale = np.dot(
                    np.exp(.5 * self.w['mog_logvar' + str(i)].get_value()), A)
                z['z'] = np.random.normal(loc=loc,
                                          scale=scale).astype(np.float32)
            else:
                raise Exception('Unknown type_pz')
        # Generate from p(x|z)

        if self.type_px == 'bernoulli':
            #print 'xz p'
            p = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                #print 'xz ber'
                x['x'] = np.random.binomial(n=1, p=p)
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar / 2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
        elif self.type_px == 'exponential':
            x_mean = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.exponential(x_mean)
        elif self.type_px == 'mixture':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                normal_list = np.asarray([1, 6, 10, 14, 18])
                exponential_list = np.asarray(
                    [0, 3, 5, 9, 13, 17, 21, 22, 23, 24, 25, 26, 27])
                uniform_list = np.asarray([2, 4, 7, 11, 15, 19])
                threemodal_list = np.asarray([8, 12, 16, 20])
                x = np.zeros((x_mean.shape))
                x[exponential_list, :] = np.random.exponential(
                    x_mean[exponential_list, :])
                x[normal_list, :] = np.random.normal(
                    x_mean[normal_list, :],
                    np.exp(x_logvar[normal_list, :] / 2))
                x[uniform_list, :] = np.random.sample(
                    x[uniform_list, :].shape) * 3.5 - 1.75
                #x[threemodal_list,:] =
                x['x'] = x

        else:
            raise Exception("")

        return x, z, _z
Ejemplo n.º 33
0
    def __init__(self, get_optimizer, theano_warning='raise'):
        
        v = self.v
        w = self.w
        theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
        theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')
        
        # Create theano expressions
        x, z = ndict.ordereddicts(self.variables())
        self.var_x, self.var_z, = x, z
        
        # Helper variables
        A = T.fmatrix('A')
        self.var_A = A
        
        '''
        # Get gradient symbols
        print 'model, x'
        for (d, xx) in x.items():
          print d
          print xx.shape
          
        print x.values()
        '''
        
        allvars = x.values() + z.values() + [A] # note: '+' concatenates lists
        
        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler than this) 
        self.dist_qz = {}
        self.dist_px = {}
        self.dist_pz = {}
        
        factors = self.factors(x, z, A)
        if len(factors) == 3:
            (logpx, logpz, logqz) = factors
            cost = 0
            sparsity_penalty = 0
        else:
            (logpx, logpz, logqz, cost, sparsity_penalty) = factors

        if get_optimizer == None:
            def get_optimizer(w, g):
                from collections import OrderedDict
                updates = OrderedDict()
                for i in w: updates[w[i]] = w[i]
                return updates

        # Log-likelihood lower bound
        self.f_L = theanofunction(allvars, [logpx, logpz, logqz, cost, sparsity_penalty])
        L = (logpx + logpz - logqz).sum() - cost - sparsity_penalty

        g = T.grad(L, v.values() + w.values())
        gv, gw = dict(zip(v.keys(), g[0:len(v)])), dict(zip(w.keys(), g[len(v):len(v)+len(w)]))
        updates = get_optimizer(v, gv)
        updates.update(get_optimizer(w, gw))
        
        #self.profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
        #self.f_evalAndUpdate = theano.function(allvars, [logpx + logpz - logqz], updates=updates_w, mode=self.profmode)
        #theano.printing.debugprint(self.f_evalAndUpdate)
        
        self.f_eval_test = theanofunction(allvars, [logpx + logpz - logqz, logpx, logpz, -logqz])
        self.f_eval = theanofunction(allvars, [logpx + logpz - logqz])
        self.f_evalAndUpdate = theanofunction(allvars, [logpx + logpz - logqz], updates=updates)
        self.f_eval_for_classcondition_prior = theanofunction(allvars, [logpx - logqz])
Ejemplo n.º 34
0
 def distribution(self, v, w, x, z, name):
     x, z = self.xz_to_theano(x, z)
     v, w, x, z = ndict.ordereddicts((v, w, x, z))
     A = self.get_A(x)
     allvars = v.values() + w.values() + x.values() + z.values() + [A]
     return self.f_dists[name](*allvars)
Ejemplo n.º 35
0
    def gen_xz(self, x, z, n_batch):
        
        x, z = ndict.ordereddicts((x, z))
        
        A = np.ones((1, n_batch)).astype(np.float32)
        
        for i in z: z[i] = z[i].astype(np.float32)
        for i in x: x[i] = x[i].astype(np.float32)
        
        _z = {}

        # If x['x'] was given but not z['z']: generate z ~ q(z|x)
        if x.has_key('x') and not z.has_key('z'):
            '''
            print x['x'].shape
            print x['mean_prior'].shape
            print A.shape
            '''
            q_mean, q_logvar = self.dist_qz['z'](*([x['x'], x['mean_prior']] + [A]))
            q_hidden = self.dist_qz['hidden'](*([x['x'], x['mean_prior']] + [A]))

            _z['mean'] = q_mean
            _z['logvar'] = q_logvar
            _z['hidden'] = q_hidden
            
            # Require epsilon
            if not z.has_key('eps'):
                eps = self.gen_eps(n_batch)['eps']
            
            z['z'] = q_mean + np.exp(0.5 * q_logvar) * eps
            
        elif not z.has_key('z'):
            if self.type_pz in ['gaussian','gaussianmarg']:
                z['z'] = np.random.standard_normal(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'laplace':
                z['z'] = np.random.laplace(size=(self.n_z, n_batch)).astype(np.float32)
            elif self.type_pz == 'studentt':
                z['z'] = np.random.standard_t(np.dot(np.exp(self.w['logv'].get_value()), A)).astype(np.float32)
            elif self.type_pz == 'mog':
                i = np.random.randint(self.n_mixture)
                loc = np.dot(self.w['mog_mean'+str(i)].get_value(), A)
                scale = np.dot(np.exp(.5*self.w['mog_logvar'+str(i)].get_value()), A)
                z['z'] = np.random.normal(loc=loc, scale=scale).astype(np.float32)
            else:
                raise Exception('Unknown type_pz')
        # Generate from p(x|z)
        
        if self.type_px == 'bernoulli':
            #print 'xz p'
            p = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = p
            if not x.has_key('x'):
                #print 'xz ber'
                x['x'] = np.random.binomial(n=1,p=p)
        elif self.type_px == 'bounded01' or self.type_px == 'gaussian':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.normal(x_mean, np.exp(x_logvar/2))
                if self.type_px == 'bounded01':
                    x['x'] = np.maximum(np.zeros(x['x'].shape), x['x'])
                    x['x'] = np.minimum(np.ones(x['x'].shape), x['x'])
        elif self.type_px == 'exponential':
            x_mean = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                x['x'] = np.random.exponential(x_mean)
        elif self.type_px == 'mixture':
            x_mean, x_logvar = self.dist_px['x'](*([z['z']] + [A]))
            _z['x'] = x_mean
            if not x.has_key('x'):
                normal_list = np.asarray([1,6,10,14,18])
                exponential_list = np.asarray([0,3,5,9,13,17,21,22,23,24,25,26,27])
                uniform_list = np.asarray([2,4,7,11,15,19])
                threemodal_list = np.asarray([8,12,16,20])
                x = np.zeros((x_mean.shape))
                x[exponential_list,:] = np.random.exponential(x_mean[exponential_list,:])
                x[normal_list,:] = np.random.normal(x_mean[normal_list, :], np.exp(x_logvar[normal_list, :]/2))
                x[uniform_list,:] = np.random.sample(x[uniform_list,:].shape) * 3.5 - 1.75
                #x[threemodal_list,:] = 
                x['x'] = x
        
        else: raise Exception("")
        
        return x, z, _z
 def dlogpw_dw(self, v, w):
     r = self.f_dlogpw_dw(*ndict.orderedvals((v, w)))
     v, w = ndict.ordereddicts((v, w))
     return r[0], r[1], dict(zip(v.keys(), r[2:2 + len(v)])), dict(zip(w.keys(), r[2 + len(v):2 + len(v) + len(w)]))
Ejemplo n.º 37
0
    def __init__(self, theano_warning='raise', hessian=True):

        theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
        theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')

        # Create theano expressions
        w, x, z = ndict.ordereddicts(self.variables())
        self.var_w, self.var_x, self.var_z, = w, x, z

        # Helper variables
        A = T.dmatrix('A')
        self.var_A = A

        # Get gradient symbols
        self.allvars = list(w.values()) + list(x.values()) + list(
            z.values()) + [A]  # note: '+' concatenates lists
        self.allvars_keys = list(w.keys()) + list(x.keys()) + list(
            z.keys()) + ['A']

        if False:
            # Put test values
            # needs fully implemented gen_xz(), which is not always the case
            # Also, the FD has no test values
            theano.config.compute_test_value = 'raise'
            _w = self.init_w()
            for i in _w:
                w[i].tag.test_value = _w[i]
            _x, _z, _ = self.gen_xz(_w, {}, {}, 10)
            _x, _z = self.xz_to_theano(_x, _z)
            for i in _x:
                x[i].tag.test_value = _x[i]
            for i in _z:
                z[i].tag.test_value = _z[i]

        # TODO: more beautiful/standardized way of setting distributions
        # (should be even simpler then this)
        self.dist_px = {}
        self.dist_pz = {}

        logpw, logpx, logpz = self.factors(w, x, z, A)
        self.var_logpw, self.var_logpx, self.var_logpz = logpw, logpx, logpz

        # Complete-data likelihood estimate
        logpxz = logpx.sum() + logpz.sum()
        self.f_logpxz = theanofunction(self.allvars, [logpx, logpz])

        dlogpxz_dwz = T.grad(logpxz, list(w.values()) + list(z.values()))
        self.f_dlogpxz_dwz = theanofunction(self.allvars,
                                            [logpx, logpz] + dlogpxz_dwz)
        #self.f_dlogpxz_dw = theanofunction(allvars, [logpxz] + dlogpxz_dw)
        #self.f_dlogpxz_dz = theanofunction(allvars, [logpxz] + dlogpxz_dz)

        # prior
        dlogpw_dw = T.grad(logpw,
                           list(w.values()),
                           disconnected_inputs='ignore')
        self.f_logpw = theanofunction(list(w.values()), logpw)
        self.f_dlogpw_dw = theanofunction(list(w.values()),
                                          [logpw] + dlogpw_dw)

        if False:
            # MC-LIKELIHOOD
            logpx_max = logpx.max()
            logpxmc = T.log(T.exp(logpx - logpx_max).mean()) + logpx_max
            self.f_logpxmc = theanofunction(self.allvars, logpxmc)
            dlogpxmc_dw = T.grad(logpxmc,
                                 list(w.values()),
                                 disconnected_inputs=theano_warning)
            self.f_dlogpxmc_dw = theanofunction(self.allvars,
                                                [logpxmc] + dlogpxmc_dw)

        if True and len(z) > 0:
            # Fisher divergence (FD)
            gz = T.grad(logpxz, list(z.values()))
            gz2 = [T.dmatrix() for _ in gz]
            fd = 0
            for i in range(len(gz)):
                fd += T.sum((gz[i] - gz2[i])**2)
            dfd_dw = T.grad(fd, list(w.values()))
            self.f_dfd_dw = theanofunction(self.allvars + gz2,
                                           [logpx, logpz, fd] + dfd_dw)

        if False and hessian:
            # Hessian of logpxz wrt z (works best with n_batch=1)
            hessian_z = theano.gradient.hessian(logpxz, z_concat)
            self.f_hessian_z = theanofunction(self.allvars, hessian_z)
Ejemplo n.º 38
0
 def __init__(self, theano_warning='raise', hessian=True):
     
     theanofunction = lazytheanofunc('warn', mode='FAST_RUN')
     theanofunction_silent = lazytheanofunc('ignore', mode='FAST_RUN')
     
     # Create theano expressions
     w, x, z = ndict.ordereddicts(self.variables())
     self.var_w, self.var_x, self.var_z, = w, x, z
     
     # Helper variables
     A = T.dmatrix('A')
     self.var_A = A
     
     # Get gradient symbols
     self.allvars = w.values()  + x.values() + z.values() + [A] # note: '+' concatenates lists
     self.allvars_keys = w.keys() + x.keys() + z.keys() + ['A']
     
     if False:
         # Put test values
         # needs fully implemented gen_xz(), which is not always the case
         # Also, the FD has no test values
         theano.config.compute_test_value = 'raise'
         _w = self.init_w()
         for i in _w: w[i].tag.test_value = _w[i]
         _x, _z, _ = self.gen_xz(_w, {}, {}, 10)
         _x, _z = self.xz_to_theano(_x, _z)
         for i in _x: x[i].tag.test_value = _x[i]
         for i in _z: z[i].tag.test_value = _z[i]
     
     # TODO: more beautiful/standardized way of setting distributions
     # (should be even simpler then this) 
     self.dist_px = {}
     self.dist_pz = {}
     
     logpw, logpx, logpz = self.factors(w, x, z, A)
     self.var_logpw, self.var_logpx, self.var_logpz = logpw, logpx, logpz
     
     # Complete-data likelihood estimate
     logpxz = logpx.sum() + logpz.sum()
     self.f_logpxz = theanofunction(self.allvars, [logpx, logpz])
     
     dlogpxz_dwz = T.grad(logpxz, w.values() + z.values())
     self.f_dlogpxz_dwz = theanofunction(self.allvars, [logpx, logpz] + dlogpxz_dwz)
     #self.f_dlogpxz_dw = theanofunction(allvars, [logpxz] + dlogpxz_dw)
     #self.f_dlogpxz_dz = theanofunction(allvars, [logpxz] + dlogpxz_dz)
     
     # prior
     dlogpw_dw = T.grad(logpw, w.values(), disconnected_inputs='ignore')
     self.f_logpw = theanofunction(w.values(), logpw)
     self.f_dlogpw_dw = theanofunction(w.values(), [logpw] + dlogpw_dw)
     
     if False:
         # MC-LIKELIHOOD
         logpx_max = logpx.max()
         logpxmc = T.log(T.exp(logpx - logpx_max).mean()) + logpx_max
         self.f_logpxmc = theanofunction(self.allvars, logpxmc)
         dlogpxmc_dw = T.grad(logpxmc, w.values(), disconnected_inputs=theano_warning)
         self.f_dlogpxmc_dw = theanofunction(self.allvars, [logpxmc] + dlogpxmc_dw)
     
     if True and len(z) > 0:
         # Fisher divergence (FD)
         gz = T.grad(logpxz, z.values())
         gz2 = [T.dmatrix() for _ in gz]
         fd = 0
         for i in range(len(gz)):
             fd += T.sum((gz[i]-gz2[i])**2)
         dfd_dw = T.grad(fd, w.values())
         self.f_dfd_dw = theanofunction(self.allvars + gz2, [logpx, logpz, fd] + dfd_dw)
         
     if False and hessian:
         # Hessian of logpxz wrt z (works best with n_batch=1)
         hessian_z = theano.gradient.hessian(logpxz, z_concat)
         self.f_hessian_z = theanofunction(self.allvars, hessian_z)
Ejemplo n.º 39
0
 def dlogpw_dw(self, v, w):
     r = self.f_dlogpw_dw(*ndict.orderedvals((v,w)))
     v, w = ndict.ordereddicts((v, w))
     return r[0], r[1], dict(zip(v.keys(), r[2:2+len(v)])), dict(zip(w.keys(), r[2+len(v):2+len(v)+len(w)]))