Ejemplo n.º 1
0
    def filterbank_matrices(self, center_y, center_x, delta, sigma):
        """Create a Fy and a Fx
        
        Parameters
        ----------
        center_y : T.vector (shape: batch_size)
        center_x : T.vector (shape: batch_size)
            Y and X center coordinates for the attention window
        delta : T.vector (shape: batch_size)
        sigma : T.vector (shape: batch_size)
        
        Returns
        -------
            FY : T.fvector (shape: )
            FX : T.fvector (shape: )
        """
        tol = 1e-4
        N = self.N

        rng = T.arange(N, dtype=floatX)-N/2.+0.5  # e.g.  [1.5, -0.5, 0.5, 1.5]

        muX = center_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*rng
        muY = center_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*rng

        a = tensor.arange(self.img_width, dtype=floatX)
        b = tensor.arange(self.img_height, dtype=floatX)
        
        FX = tensor.exp( -(a-muX.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )
        FY = tensor.exp( -(b-muY.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )
        FX = FX / (FX.sum(axis=-1).dimshuffle(0, 1, 'x') + tol)
        FY = FY / (FY.sum(axis=-1).dimshuffle(0, 1, 'x') + tol)

        return FY, FX
Ejemplo n.º 2
0
def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2).astype('int64')
    u = uw[:l]
    w = uw[l:]

    # Callable tensor
    logp_ = lambda input: theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))

    return elbo
def batchnorm(X, rescale=None, reshift=None, u=None, s=None, e=1e-8):
    """
    batchnorm with support for not using scale and shift parameters
    as well as inference values (u and s) and partial batchnorm (via a)
    will detect and use convolutional or fully connected version
    """
    g = rescale
    b = reshift
    if X.ndim == 4:
        if u is not None and s is not None:
            # use normalization params given a priori
            b_u = u.dimshuffle('x', 0, 'x', 'x')
            b_s = s.dimshuffle('x', 0, 'x', 'x')
        else:
            # compute normalization params from input
            b_u = T.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
            b_s = T.mean(T.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
        # batch normalize
        X = (X - b_u) / T.sqrt(b_s + e)
        if g is not None and b is not None:
            # apply rescale and reshift
            X = X*T.exp(0.2*g.dimshuffle('x', 0, 'x', 'x')) + b.dimshuffle('x', 0, 'x', 'x')
    elif X.ndim == 2:
        if u is None and s is None:
            # compute normalization params from input
            u = T.mean(X, axis=0)
            s = T.mean(T.sqr(X - u), axis=0)
        # batch normalize
        X = (X - u) / T.sqrt(s + e)
        if g is not None and b is not None:
            # apply rescale and reshift
            X = X*T.exp(0.2*g) + b
    else:
        raise NotImplementedError
    return X
Ejemplo n.º 4
0
 def K(self, x, y):
     l = tensor.exp(self.log_lenscale)
     d = ((x ** 2).sum(axis=1).dimshuffle(0, 'x')
             + (y ** 2).sum(axis=1)
             - 2 * tensor.dot(x, y.T))
     K = tensor.exp(-tensor.sqrt(d) / l)
     return K
Ejemplo n.º 5
0
	def learn_step(self):
		
		#this is a list of gradients w.r.t. every parameter in self.params
		gparams=T.grad(self.loss, self.params)
		
		updates=OrderedDict()
		#updates the momentums and parameter values
		i=0
		for param, gparam, momentum, lrate, momentum_coeff in zip(self.params, gparams, self.momentums, self.lrates, self.momentum_coeffs):
			
			#if param.ndim==2:
			#	gparam=T.dot(T.dot(param,param.T),gparam)
			
			if param.name=='log_stddev':
				gparam=gparam*2.0*T.exp(2.0*param)
			
			if param.name=='M':
				gparam=gparam*T.exp(1.0*self.params[i+2]).dimshuffle('x',0)
			
			if param.name=='b':
				gparam=gparam*T.exp(1.0*self.params[i+1])
			
			new_momentum=momentum_coeff*momentum - lrate*gparam*self.global_lrate
			new_param=param + new_momentum
			
			updates[param]=new_param
			updates[momentum]=new_momentum
			i+=1
		
		updates[self.global_lrate]=self.global_lrate*self.lrate_decay
		
		return updates
Ejemplo n.º 6
0
	def __init__(self,
				 word_vec_width,
				 batch_size,
				 num_hidden,
				 learning_rate=0.1):
		self.num_hidden = num_hidden
		self.learning_rate = learning_rate
		self.word_vec_width = word_vec_width
		self.batch_size = batch_size

		self.vocab_mat = T.fmatrix('vocab')
		self.word_onehot = T.fmatrix('word_onehot')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = 1 / (1 + T.exp(-(W * (self.word_onehot.dot(self.vocab_mat) + b))))
		s = T.sum(f)

		self.exec_fn = theano.function(
			[self.word_onehot, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)

		self.word_onehot_c = T.fmatrix('word_onehot_c')
		f_c = 1 / (1 + T.exp(-(W * (self.word_onehot_c.dot(self.vocab_mat)) + b)))
		s_c = T.sum(f_c)

		J = T.largest(0, 1 - s + s_c)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.word_onehot, self.word_onehot_c, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)
Ejemplo n.º 7
0
 def __init__(self, alpha, beta, *args, **kwargs):
     super(Weibull, self).__init__(*args, **kwargs)
     self.alpha = alpha
     self.beta = beta
     self.mean = beta * T.exp(gammaln(1 + 1./alpha))
     self.median = beta * T.exp(gammaln(T.log(2)))**(1./alpha)
     self.variance = (beta**2) * T.exp(gammaln(1 + 2./alpha - self.mean**2))
Ejemplo n.º 8
0
    def forward_init(self):
        obs_ = self.obs_.reshape([self.obs_.shape[0]*self.obs_.shape[1], self.obs_.shape[-1]])

        h = eval(self.activ)(tensor.dot(obs_, self.params['W']) + self.params['b'][None,None,:])

        self.pi = []
        for oi in xrange(self.n_out):
            pi = tensor.dot(h, self.params['U%d'%oi]) + self.params['c%d'%oi][None,:]
            pi = tensor.exp(pi - tensor.max(pi,-1,keepdims=True))
            self.pi.append(pi / (pi.sum(-1, keepdims=True)))

        prev = tensor.matrix('prev', dtype='float32')
        #obs = tensor.matrix('obs', dtype='float32')
        obs_ = self.obs_.reshape([self.obs_.shape[0]*self.obs_.shape[1], 
                                  self.obs_.shape[-1]])
        obs_ = obs_[0]

        self.h_init = lambda x: numpy.float32(0.)

        h = eval(self.activ)(tensor.dot(obs_, self.params['W']) + self.params['b'][None,:])

        pi = []
        for oi in xrange(self.n_out):
            pi_ = tensor.dot(h, self.params['U%d'%oi]) + self.params['c%d'%oi][None,:]
            pi_ = tensor.exp(pi_ - tensor.max(pi_,-1,keepdims=True))
            pi.append(pi_ / (pi_.sum(-1, keepdims=True)))

        self.forward = theano.function([self.obs, prev], [h] + pi, name='forward', on_unused_input='ignore')
Ejemplo n.º 9
0
def filterbank_matrices(center_y, center_x, delta, sigma, N, imgshp):
    """Create a Fy and a Fx

    Parameters
    ----------
    center_y : T.vector (shape: batch_size)
    center_x : T.vector (shape: batch_size)
        Y and X center coordinates for the attention window
    delta : T.vector (shape: batch_size)
    sigma : T.vector (shape: batch_size)

    Returns
    -------
        FY, FX
    """
    tol = 1e-4
    img_height, img_width = imgshp
    muX = center_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(N)-N/2-0.5)
    muY = center_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(N)-N/2-0.5)

    a = T.arange(img_width)
    b = T.arange(img_height)

    FX = T.exp( -(a-muX.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )
    FY = T.exp( -(b-muY.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )
    FX = FX / (FX.sum(axis=-1).dimshuffle(0, 1, 'x') + tol)
    FY = FY / (FY.sum(axis=-1).dimshuffle(0, 1, 'x') + tol)

    return FY, FX
Ejemplo n.º 10
0
    def lp_norm(self, n, k, r, c, z):
        '''
        Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
        :param n:
        :param k:
        :param r:
        :param c:
        :param z:
        :return:
        '''
        ds0, ds1 = self.pool_size
        st0, st1 = self.stride
        pad_h = self.pad[0]
        pad_w = self.pad[1]

        row_st = r * st0
        row_end = T.minimum(row_st + ds0, self.img_rows)
        row_st = T.maximum(row_st, self.pad[0])
        row_end = T.minimum(row_end, self.x_m2d + pad_h)

        col_st = c * st1
        col_end = T.minimum(col_st + ds1, self.img_cols)
        col_st = T.maximum(col_st, self.pad[1])
        col_end = T.minimum(col_end, self.x_m1d + pad_w)

        Lp = T.pow(
                T.mean(T.pow(
                        T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
                        1 + T.log(1 + T.exp(self.P))
                )),
                1 / (1 + T.log(1 + T.exp(self.P)))
        )

        return T.set_subtensor(z[n, k, r, c], Lp)
Ejemplo n.º 11
0
    def output_probabilistic(self, m_w_previous, v_w_previous):
        if (self.non_linear):
            m_in = self.m_w - m_w_previous
            v_in = self.v_w
            # We compute the mean and variance after the ReLU activation
            lam = self.lam
            v_1 = 1 + 2*lam*v_in
            v_1_inv = v_1**-1

            s_1 = T.prod(v_1,axis=1)**-0.5
            v_2 = 1 + 4*lam*v_in
            v_2_inv = v_2**-1
            s_2 = T.prod(v_2,axis=1)**-0.5
            v_inv = v_in**-1
            exponent1 = m_in**2*(1 - v_1_inv)*v_inv
            exponent1 = T.sum(exponent1,axis=1)
            exponent2 = m_in**2*(1 - v_2_inv)*v_inv
            exponent2 = T.sum(exponent2,axis=1)
            m_a = s_1*T.exp(-0.5*exponent1)
            v_a = s_2*T.exp(-0.5*exponent2) - m_a**2

            return (m_a, v_a)

        else:
            m_w_previous_with_bias = \
            T.concatenate([ m_w_previous, T.alloc(1, 1) ], 0)
            v_w_previous_with_bias = \
            T.concatenate([ v_w_previous, T.alloc(0, 1) ], 0)

            m_linear = T.dot(self.m_w, m_w_previous_with_bias) / T.sqrt(self.n_inputs)
            v_linear = (T.dot(self.v_w, v_w_previous_with_bias) + \
                T.dot(self.m_w**2, v_w_previous_with_bias) + \
                T.dot(self.v_w, m_w_previous_with_bias**2)) / self.n_inputs
            return (m_linear, v_linear)
Ejemplo n.º 12
0
def flow(init_W,init_b,nData):
    import theano
    import theano.tensor as T

    n_layers = len(init_b)

    bias = []
    weights = []
    muStates = []
    for layer_i in xrange(n_layers):
        bias.append(theano.shared(value=init_b[layer_i],
                                    name='b'+str(layer_i),
                                    borrow=True))
        weights.append(theano.shared(value=init_W[layer_i],
                                    name='W'+str(layer_i),
                                    borrow=True))
        muStates.append(T.matrix('mu'+str(layer_i)))

    for layer_i in xrange(n_layers):
        diffe = T.tile(bias[layer_i].copy(), (nData,1))
        # All layers except top
        if layer_i < (n_layers-1):
            W_h = weights[layer_i].dot(muStates[layer_i+1].T).T
            diffe += W_h

        if layer_i > 0:
            vT_W = muStates[layer_i-1].dot(weights[layer_i-1])
            diffe += vT_W

        exK = muStates[layer_i]*T.exp(.5*-diffe) + (1.-muStates[layer_i])*T.exp(.5*diffe)
        flows += exK.sum()
    return flows
Ejemplo n.º 13
0
    def step(xinp_h1_t, xgate_h1_t,
             xinp_h2_t, xgate_h2_t,
             h1_tm1, h2_tm1, k_tm1, w_tm1, ctx):
        attinp_h1, attgate_h1 = att_to_h1.proj(w_tm1)

        h1_t = cell1.step(xinp_h1_t + attinp_h1, xgate_h1_t + attgate_h1,
                          h1_tm1)
        h1inp_h2, h1gate_h2 = h1_to_h2.proj(h1_t)

        a_t = h1_t.dot(h1_to_att_a)
        b_t = h1_t.dot(h1_to_att_b)
        k_t = h1_t.dot(h1_to_att_k)

        a_t = tensor.exp(a_t)
        b_t = tensor.exp(b_t)
        k_t = k_tm1 + tensor.exp(k_t)

        ss4 = calc_phi(k_t, a_t, b_t, u)
        ss5 = ss4.dimshuffle(0, 1, 'x')
        ss6 = ss5 * ctx.dimshuffle(1, 0, 2)
        w_t = ss6.sum(axis=1)

        attinp_h2, attgate_h2 = att_to_h2.proj(w_t)
        h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2,
                          xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1)
        return h1_t, h2_t, k_t, w_t
Ejemplo n.º 14
0
	def _step(self,xg_t, xo_t, xc_t, mask_tm1,h_tm1, c_tm1, u_g, u_o, u_c):

		h_mask_tm1 = mask_tm1 * h_tm1
		c_mask_tm1 = mask_tm1 * c_tm1
		act = T.tensordot( xg_t + h_mask_tm1, u_g , [[1],[2]])
		gate = T.nnet.softmax(act.reshape((-1, act.shape[-1]))).reshape(act.shape)

		c_tilda = self.activation(xc_t + T.dot(h_mask_tm1, u_c))

		sigma_se = self.k_parameters[0]
		sigma_per = self.k_parameters[1]
		sigma_b_lin = self.k_parameters[2]
		sigma_v_lin = self.k_parameters[3]
		sigma_rq = self.k_parameters[4]

		l_se = self.k_parameters[5]
		l_per = self.k_parameters[6]
		l_lin = self.k_parameters[7]
		l_rq = self.k_parameters[8]

		alpha_rq = self.k_parameters[9]
		p_per = self.k_parameters[10]

		k_se = T.pow(sigma_se,2) * T.exp( -T.pow(c_mask_tm1 - c_tilda,2) / (2* T.pow(l_se,2) + self.EPS))
		k_per = T.pow(sigma_per,2) * T.exp( -2*T.pow(T.sin( math.pi*(c_mask_tm1 - c_tilda)/ (p_per + self.EPS) ),2)	 / ( T.pow(l_per,2) + self.EPS ))
		k_lin = T.pow(sigma_b_lin,2) + T.pow(sigma_v_lin,2)	 * (c_mask_tm1 - l_lin) * (c_tilda - l_lin )
		k_rq = T.pow(sigma_rq,2) * T.pow( 1 + T.pow( (c_mask_tm1 - c_tilda),2)	/ ( 2 * alpha_rq * T.pow(l_rq,2) + self.EPS), -alpha_rq)

		ops = [c_mask_tm1,c_tilda,k_se, k_per, k_lin,k_rq]
		yshuff = T.as_tensor_variable( ops, name='yshuff').dimshuffle(1,2,0)
		c_t = (gate.reshape((-1,gate.shape[-1])) * yshuff.reshape((-1,yshuff.shape[-1]))).sum(axis = 1).reshape(gate.shape[:2])
		o_t = self.inner_activation(xo_t + T.dot(h_mask_tm1, u_o))
		h_t = o_t * self.activation(c_t)
		return h_t, c_t
Ejemplo n.º 15
0
    def get_gradients(self, X, Y, weights=1.0):
        W_mean, W_ls, b_mean, b_ls = self.parameters

        mean, log_sigma = self.sample_expected(Y)
        sigma = tensor.exp(log_sigma)

        cost = -log_sigma - 0.5 * (X - mean) ** 2 / tensor.exp(2 * log_sigma)
        if weights != 1.0:
            cost = -weights.dimshuffle(0, "x") * cost

        cost_scaled = sigma ** 2 * cost
        cost_gscale = (sigma ** 2).sum(axis=1).dimshuffle([0, "x"])
        cost_gscale = cost_gscale * cost

        gradients = OrderedDict()

        params = Selector(self.mlp).get_parameters()
        for pname, param in params.iteritems():
            gradients[param] = tensor.grad(cost_gscale.sum(), param, consider_constant=[X, Y])

        gradients[W_mean] = tensor.grad(cost_scaled.sum(), W_mean, consider_constant=[X, Y])
        gradients[b_mean] = tensor.grad(cost_scaled.sum(), b_mean, consider_constant=[X, Y])

        gradients[W_ls] = tensor.grad(cost_scaled.sum(), W_ls, consider_constant=[X, Y])
        gradients[b_ls] = tensor.grad(cost_scaled.sum(), b_ls, consider_constant=[X, Y])

        return gradients
Ejemplo n.º 16
0
def decoder(localt, stm1, cstm1, hmat,
            Wbeta, Ubeta, vbeta,
            Wzide, Wzfde, Wzcde, Wzode,
            Ede, Wxide, Wside, bide, Wxfde, Wsfde, bfde, 
            Wxcde, Wscde, bcde, Wxode, Wsode, bode,
            L0, Ls, Lz):
    xt = theano.dot(localt, Ede)
    # get z from hmat (sentlen * nen), stm1
    beta = \
    theano.dot( act( theano.dot(hmat,Ubeta) + theano.dot(stm1,Wbeta) ) , vbeta )
    alpha = T.exp(beta-T.max(beta)) / T.sum(T.exp(beta-T.max(beta)) )
    zt = theano.dot(alpha, hmat)
    #
    it = sigma(theano.dot(xt,Wxide) + theano.dot(stm1,Wside) + theano.dot(zt,Wzide) + bide )
    ft = sigma(theano.dot(xt,Wxfde) + theano.dot(stm1,Wsfde) + theano.dot(zt,Wzfde) + bfde )
    cst = ft * cstm1 + it*act(theano.dot(xt,Wxcde)+theano.dot(stm1,Wscde)+ theano.dot(zt,Wzcde) +bcde )
    ot = sigma(theano.dot(xt,Wxode) + theano.dot(stm1,Wsode) + theano.dot(zt,Wzode) +bode )
    st = ot * act(cst)
    #
    winst = getwins()
    stfory = st * winst
    #
    yt0 = T.dot( (xt + T.dot(stfory, Ls) + T.dot(zt, Lz) ) , L0)
    #yt0 = theano.dot(st,Wsyde)
    yt0max = T.max(yt0)
    #yt0maxvec = T.maximum(yt0, yt0max)
    yt = T.exp(yt0-yt0max) / T.sum(T.exp(yt0-yt0max))
    logyt = yt0-yt0max-T.log(T.sum(T.exp(yt0-yt0max)))
    #yt = T.exp(yt0-yt0maxvec) / T.sum(T.exp(yt0-yt0maxvec))
    #logyt = yt0-yt0maxvec-T.log(T.sum(T.exp(yt0-yt0maxvec)))
#    yt = T.concatenate([addzero,tempyt],axis=0)
    return st, cst, yt, logyt
Ejemplo n.º 17
0
 def nn2att(self, l):
     """Convert neural-net outputs to attention parameters
 
     Parameters
     ----------
     l : tensor (batch_size x 5)
 
     Returns
     -------
     center_y : vector (batch_size)
     center_x : vector (batch_size)
     delta : vector (batch_size)
     sigma : vector (batch_size)
     gamma : vector (batch_size)
     """
     center_y  = l[:,0]
     center_x  = l[:,1]
     log_delta = l[:,2]
     log_sigma = l[:,3]
     log_gamma = l[:,4]
 
     delta = T.exp(log_delta)
     sigma = T.exp(log_sigma/2.)
     gamma = T.exp(log_gamma).dimshuffle(0, 'x')
 
     # normalize coordinates
     center_x = (center_x+1.)/2. * self.img_width
     center_y = (center_y+1.)/2. * self.img_height
     delta = (max(self.img_width, self.img_height)-1) / (self.N-1) * delta
 
     return center_y, center_x, delta, sigma, gamma
Ejemplo n.º 18
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting,
    non power-of-two shape """

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32') + 0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    print >> sys.stdout, "STARTING FUNCTION 1"
    f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 2"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 3"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))],
              mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
Ejemplo n.º 19
0
def softmax(x):
    if x.ndim == 2:
        e = TT.exp(x)
        return e / TT.sum(e, axis=1).dimshuffle(0, 'x')
    else:
        e = TT.exp(x)
        return e/ TT.sum(e)
Ejemplo n.º 20
0
    def _step(self,h_tm1,p_x,p_xm,ctx):
        #visual attention
    
        #ctx=dropout_layer(ctx)
        v_a=T.exp(ctx+T.dot(h_tm1,self.W_v))
        v_a=v_a/v_a.sum(1, keepdims=True) 
        
        ctx_p=ctx*v_a
    
        #linguistic attention
        l_a=p_x+T.dot(h_tm1,self.W_l)[None,:,:]

        l_a=T.dot(l_a,self.U_att)+self.b_att        

        l_a=T.exp(l_a.reshape((l_a.shape[0],l_a.shape[1])))
        
        l_a=l_a/l_a.sum(0, keepdims=True) 
        
        l_a=l_a*p_xm
        
        p_x_p=(p_x*l_a[:,:,None]).sum(0)
        
        h= T.dot(ctx_p,self.W_vh) + T.dot(p_x_p,self.W_lh)

        return h
Ejemplo n.º 21
0
    def _step(self, x_tm1, u_tm1, inputs, x_prior, u_prior, *args):
        # x_prior are previous states
        # u_prior are causes from above
        outputs = self.activation(T.dot(x_tm1, self.W))
        rec_error = T.sqr(inputs - outputs).sum()
        causes = (1 + T.exp(-T.dot(u_tm1, self.V))) * .5

        if self.pool_flag:
            batch_size = inputs.shape[0]
            dim = causes.shape[1]
            imgs = T.cast(T.sqrt(dim), 'int64')
            causes_up = causes.reshape(
                (batch_size, 1, imgs, imgs)).repeat(
                    self.pool_size, axis=2).repeat(self.pool_size,
                                                   axis=3).flatten(ndim=2)
        else:
            causes_up = causes

        x = _IstaStep(rec_error, x_tm1, lambdav=self.gamma*causes_up,
                      x_prior=x_prior)

        if self.pool_flag:
            dim = T.cast(T.sqrt(x.shape[1]), 'int64')
            x_pool = x.reshape((batch_size, 1, dim, dim))
            x_pool = max_pool_2d(x_pool, ds=(self.pool_size, )*2).flatten(ndim=2)
        else:
            x_pool = x

        prev_u_cost = .01 * self.gamma * T.sqr(u_tm1-u_prior).sum()
        u_cost = causes * abs(x_pool) * self.gamma + prev_u_cost
        u = _IstaStep(u_cost.sum(), u_tm1, lambdav=self.gamma)
        causes = (1 + T.exp(-T.dot(u, self.V))) * .5
        u_cost = causes * abs(x_pool) * self.gamma

        return (x, u, u_cost, outputs)
Ejemplo n.º 22
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R")
        mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') 
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1])
        negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        theta = mu+T.exp(logSigma)*v
        W=theta
        y=X[:,0]
        X_sim=X[:,1:]
        f = (T.dot(X_sim,W)+u).flatten()
        
        gradvariables = [mu, logSigma, logLambd]
        
        
        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2)

        logp = (negKL + logLike)/self.m

        optimizer = -logp
        
        self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore')
        self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore')
        derivatives = T.grad(logp,gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
Ejemplo n.º 23
0
def bbox_transform_inv(boxes, deltas):
    if boxes.shape[0] == 0:
        return T.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    boxes = boxes.astype(deltas.dtype)

    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]

    pred_ctr_x = dx * widths.dimshuffle(0,'x') + ctr_x.dimshuffle(0,'x')
    pred_ctr_y = dy * heights.dimshuffle(0,'x') + ctr_y.dimshuffle(0,'x')
    pred_w = T.exp(dw) * widths.dimshuffle(0,'x')
    pred_h = T.exp(dh) * heights.dimshuffle(0,'x')

    pred_boxes = T.zeros_like(deltas, dtype=deltas.dtype)
    # x1
    pred_boxes = T.set_subtensor(pred_boxes[:, 0::4], pred_ctr_x - 0.5 * pred_w)
    # y1
    pred_boxes = T.set_subtensor(pred_boxes[:, 1::4], pred_ctr_y - 0.5 * pred_h)
    # x2
    pred_boxes = T.set_subtensor(pred_boxes[:, 2::4], pred_ctr_x + 0.5 * pred_w)
    # y2
    pred_boxes = T.set_subtensor(pred_boxes[:, 3::4], pred_ctr_y + 0.5 * pred_h)

    return pred_boxes
Ejemplo n.º 24
0
        def model(x, p, p_dropout, noise):
            input_size = x.shape[1]

            h0 = p.W_emb[x]  # (seq_len, batch_size, emb_size)
            h0 = dropout(h0, p_dropout)

            cost, h1, c1, h2, c2 = [0., b1_h, b1_c, b2_h, b2_c]
            eps = srnd.normal((self.hp.seq_size, input_size, self.n_zpt), dtype=theano.config.floatX)
            
            for t in xrange(0, self.hp.seq_size):
                if t >= self.hp.warmup_size:
                    pyx = softmax(T.dot(h2, T.transpose(p.W_emb)))
                    cost += T.sum(T.nnet.categorical_crossentropy(pyx, theano_one_hot(x[t], n_tokens)))

                h_x = concatenate([h0[t], h2], axis=1)
                h1, c1 = lstm(h_x, h1, c1, p.W1, p.V1, p.b1)
                h1 = dropout(h1, p_dropout)

                mu_encoder = T.dot(h1, p.Wmu) + p.bmu
                if noise:
                    log_sigma_encoder = 0.5*(T.dot(h1, p.Wsi) + p.bsi) 
                    cost += -0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder)) * 0.01
                    z = mu_encoder + eps[t]*T.exp(log_sigma_encoder)
                else:
                    z = mu_encoder

                h2, c2 = lstm(z, h2, c2, p.W2, p.V2, p.b2)
                h2 = dropout(h2, p_dropout)

            h_updates = [(b1_h, h1), (b1_c, c1), (b2_h, h2), (b2_c, c2)]
            return cost, h_updates
Ejemplo n.º 25
0
def softmax_ratio(numer, denom):
    """
    .. todo::

        WRITEME properly

    Parameters
    ----------
    numer : Variable
        Output of a softmax.
    denom : Variable
        Output of a softmax.

    Returns
    -------
    ratio : Variable
        numer / denom, computed in a numerically stable way
    """

    numer_Z = arg_of_softmax(numer)
    denom_Z = arg_of_softmax(denom)
    numer_Z -= numer_Z.max(axis=1).dimshuffle(0, 'x')
    denom_Z -= denom_Z.min(axis=1).dimshuffle(0, 'x')

    new_num = T.exp(numer_Z - denom_Z) * (T.exp(denom_Z).sum(
        axis=1).dimshuffle(0, 'x'))
    new_den = (T.exp(numer_Z).sum(axis=1).dimshuffle(0, 'x'))

    return new_num / new_den
Ejemplo n.º 26
0
    def cost(self, Y, Y_hat):
        """
        Y must be one-hot binary. Y_hat is a softmax estimate.
        of Y. Returns negative log probability of Y under the Y_hat
        distribution.
        """
        y_probclass, y_probcluster = Y_hat
        #Y = self._group_dot.fprop(Y, Y_hat)
        
        CLS = self.array_clusters[T.cast(T.argmax(Y,axis=1),'int32')]
        #theano.printing.Print('value of cls')(CLS)
        assert hasattr(y_probclass, 'owner')
        owner = y_probclass.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
          assert len(owner.inputs) == 1
          y_probclass, = owner.inputs
          owner = y_probclass.owner
          op = owner.op
        assert isinstance(op, T.nnet.Softmax)

        z_class ,= owner.inputs
        assert z_class.ndim == 2

        assert hasattr(y_probcluster, 'owner')
        owner = y_probcluster.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            y_probcluster, = owner.inputs
            owner = y_probcluster.owner
            op = owner.op
        assert isinstance(op, T.nnet.Softmax)
        z_cluster ,= owner.inputs
        assert z_cluster.ndim == 2

        z_class = z_class - z_class.max(axis=1).dimshuffle(0, 'x')
        log_prob = z_class - T.log(T.exp(z_class).sum(axis=1).dimshuffle(0, 'x'))
        # we use sum and not mean because this is really one variable per row
        # Y = OneHotFormatter(self.n_classes).theano_expr(
        #                         T.addbroadcast(Y,0,1).dimshuffle(0).astype('uint32'))
        log_prob_of = (Y * log_prob).sum(axis=1)
        assert log_prob_of.ndim == 1

        # cluster
        z_cluster = z_cluster - z_cluster.max(axis=1).dimshuffle(0, 'x')
        log_prob_cls = z_cluster - T.log(T.exp(z_cluster).sum(axis=1).dimshuffle(0, 'x'))

        out = OneHotFormatter(self.n_clusters).theano_expr(CLS.astype('int32'))
        #CLS = OneHotFormatter(self.n_clusters).theano_expr(
         #                        T.addbroadcast(CLS, 1).dimshuffle(0).astype('uint32'))
        log_prob_of_cls = (out * log_prob_cls).sum(axis=1)
        assert log_prob_of_cls.ndim == 1

        # p(w|history) = p(c|s) * p(w|c,s)
        log_prob_of = log_prob_of + log_prob_of_cls
        rval = log_prob_of.mean()        
        return - rval
Ejemplo n.º 27
0
	def initialise(self):
		rng = np.random.RandomState(23455)
		inpt = self.inpt
		w_shp = (self.in_dim,self.out_dim)
		w_bound = np.sqrt(self.out_dim)
		W_mu = theano.shared( np.asarray(
        rng.normal(0.,0.01,size=w_shp),
            dtype=inpt.dtype), name ='w_post_mu')

		b_shp = (self.out_dim,)
		b_mu = theano.shared(np.asarray(
            np.zeros(self.out_dim),
            dtype=inpt.dtype), name ='b_post_mu')
		W_sigma = theano.shared( np.asarray(
        rng.normal(0.,0.01,size=w_shp),
            dtype=inpt.dtype), name ='w_post_sigm')

		b_sigma = theano.shared(np.asarray(
            np.zeros(self.out_dim),
            dtype=inpt.dtype), name ='b_post_sigm')        #Find the hidden variable z
		self.mu_encoder = T.dot(self.inpt,W_mu) +b_mu
		self.log_sigma_encoder =0.5*(T.dot(self.inpt,W_sigma) + b_sigma)
		self.output =self.mu_encoder +T.exp(self.log_sigma_encoder)*self.eps.astype(theano.config.floatX)
		self.prior =  0.5* T.sum(1 + 2*self.log_sigma_encoder - self.mu_encoder**2 - T.exp(2*self.log_sigma_encoder),axis=1).astype(theano.config.floatX)
		self.params = [W_mu,b_mu,W_sigma,b_sigma]
Ejemplo n.º 28
0
 def softmax_neg(self, X):
     if hasattr(self, 'hack_matrix'):
         X = X * self.hack_matrix
         e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) * self.hack_matrix
     else:
         e_x = T.fill_diagonal(T.exp(X - X.max(axis=1).dimshuffle(0, 'x')), 0)
     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
Ejemplo n.º 29
0
def entropy_exp(X, g=None, b=None, u=None, s=None, a=1., e=1e-8):
    if X.ndim == 4:
        if u is not None and s is not None:
            b_u = u.dimshuffle('x', 0, 'x', 'x')
            b_s = s.dimshuffle('x', 0, 'x', 'x')
        else:
            b_u = T.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
            b_s = T.mean(T.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
        if a != 1:
            b_u = (1. - a)*0. + a*b_u
            b_s = (1. - a)*1. + a*b_s
        X = (X - b_u) / T.sqrt(b_s + e)
        if g is not None and b is not None:
            X = X*T.exp(g.dimshuffle('x', 0, 'x', 'x'))+b.dimshuffle('x', 0, 'x', 'x')
    elif X.ndim == 2:
        if u is None and s is None:
            u = T.mean(X, axis=0)
            s = T.mean(T.sqr(X - u), axis=0)
        if a != 1:
            u = (1. - a)*0. + a*u
            s = (1. - a)*1. + a*s
        X = (X - u) / T.sqrt(s + e)
        if g is not None and b is not None:
            X = X*T.exp(g)+b
    else:
        raise NotImplementedError
    return X
def compute_f_mu(x, t, params):
	[centers, spreads, biases, M, b]=params
	diffs=x.dimshuffle(0,1,2,'x')-centers.dimshuffle('x','x',0,1)
	scaled_diffs=(diffs**2)*T.exp(spreads).dimshuffle('x','x',0,1)
	exp_terms=T.sum(scaled_diffs,axis=2)+biases.dimshuffle('x','x',0)*0.0
	h=T.exp(-exp_terms)
	sumact=T.sum(h,axis=2)
	#Normalization
	hnorm=h/sumact.dimshuffle(0,1,'x')
	z=T.dot(hnorm,M)
	z=T.reshape(z,(t.shape[0],t.shape[1],ntgates,nx))+b.dimshuffle('x','x',0,1) #nt by nb by ntgates by nx
	#z=z+T.reshape(x,(t.shape[0],t.shape[1],1,nx))
	
	tpoints=T.cast(T.arange(ntgates),'float32')/T.cast(ntgates-1,'float32')
	tpoints=T.reshape(tpoints, (1,1,ntgates))
	#tgating=T.exp(T.dot(t,muWT)+mubT) #nt by nb by ntgates
	tgating=T.exp(-kT*(tpoints-t)**2)
	tgating=tgating/T.reshape(T.sum(tgating, axis=2),(t.shape[0], t.shape[1], 1))
	tgating=T.reshape(tgating,(t.shape[0],t.shape[1],ntgates,1))
	
	mult=z*tgating
	
	out=T.sum(mult,axis=2)
	
	#out=out+x
	
	return T.cast(out,'float32')
Ejemplo n.º 31
0
def _log_dot_tensor(x, z):
    log_dot = x.dimshuffle(1, 'x', 0) + z
    max_ = log_dot.max(axis=0)
    out = (T.log(T.sum(T.exp(log_dot - max_[None, :, :]), axis=0)) + max_)
    out = out.T
    return T.switch(T.isnan(out), -numpy.inf, out)
Ejemplo n.º 32
0
 def htovMB(self, HsampM):
     """
     computes visible unit outputs given hidden unit inputs ("half" a MCMC iteration)
     computes in parallel given input rows of hidden units
    
     args:
     HsampM (T.matrix): rows of hidden unit inputs
     
     returns:
     a T.matrix, rows of visible unit outputs
     
     """
     
     T_omgH = T.matrix(name="T_omgH", dtype=theano.config.floatX)
     T_means = T.matrix(name="T_means", dtype=theano.config.floatX)
     htovMBres = T.matrix(name="htovMBres", dtype=theano.config.floatX)
     
     T_omgH = T.transpose(T.dot(self.T_omega, T.transpose(HsampM)))
     T_means = T.fill(T_omgH, self.T_a) + T_omgH
     htovMBres = self.T_rng.normal(size=T_means.shape, avg=T_means, std=T.fill(T_means,T.sqrt(T.exp(self.T_z))), dtype=theano.config.floatX)
     return htovMBres
Ejemplo n.º 33
0
    def __init__(self, noOfVisibleUnits, noOfHiddenUnits, CD_n, aRate, bRate, omegaRate, sigmaRate, omega=None, b=None, a=None, z=None, rprop_e = 0.01, rprop_en =0.005, sparseTargetp=0.01):
        '''
        constructor
        RBMrv_T(self, noOfVisibleUnits, noOfHiddenUnits, CD_n, aRate, bRate, omegaRate, sigmaRate, omega=None, b=None, a=None, z=None, rprop_e = 0.01, rprop_en =0.005, sparseTargetp=0.01):
        
        noOfVisibleUnits (int):         must be perfect square
        noOfHiddenUnits (int):          must be perfect square
        CD_n (int):                     no. of iterations in MCMC simulation during training, check if model means are used if CD_n = 1
        aRate (float32):                update rate of parameter \underline{a} during training
        bRate (float32):                update rate of parameter \underline{b} during training
        omegaRate (float32):            update rate of parameter \boldsymbol{\omega} during training
        sigmaRate (float32):            update rate of parameter \underline{z} during training
        omega (numpy array of float32): \omega parameter matrix with noOfVisible unit rows x noOfHiddenUnits columns  
        b (numpy array of float32):     b parameter vector, size = noOfHiddenUnits
        a (numpy array of float32):     b parameter vector, size = noOfVisibleUnits
        z (numpy array of float32):     z parameter vector, size = noOfVisibleUnits
        rprop_e (float32):              
        rprop_en (float32):             
        sparseTargetp (float32):        target mean hidden unit activation for training. between (0,1)
        
        '''
        
        self.epsilon = 0.0000001

        theano.config.exception_verbosity = 'high'
        #rprop parameters and variables, rprop not used 
        self.T_rprop_e = theano.shared(value=np.float32(rprop_e), name='T_rprop_e', borrow = True, allow_downcast=True)
        self.T_rprop_en = theano.shared(value=np.float32(rprop_en), name='T_rprop_en', borrow = True, allow_downcast=True)
        self.T_posUpdate = theano.shared(value=np.float32(0.5*(1.0+rprop_e)), name='T_posUpdate', borrow = True, allow_downcast=True)
        self.T_negUpdate = theano.shared(value=np.float32(0.5*(1.0-rprop_en)), name='T_negUpdate', borrow = True, allow_downcast=True)
        
        #network geometry and training parameters
        self.miniBatchSize = 0 #will be set in self.trainMB(...)
        self.parameterLoaded = False
        self.parameterSaved = False
        self.sparseTargetp = sparseTargetp
        self.CD_n = CD_n
        self.nv = noOfVisibleUnits
        self.nh = noOfHiddenUnits
        self.dimV = int(math.sqrt(self.nv))
        self.dimH = int(math.sqrt(self.nh))
        self.aRate = np.float32(aRate)
        self.bRate = np.float32(bRate)
        self.omegaRate = np.float32(omegaRate)
        self.sigmaRate = np.float32(sigmaRate)
        #initialise v and h 
        self.v = np.float32(np.random.uniform(0, 1.0, self.nv))
        self.h = np.float32(np.random.binomial(1.0,0.5,self.nh))
        self.logLikelihood = []
        self.likelihood4plot = []
        
        
        self.T_aRate = theano.shared(value=np.float32(aRate), name='T_aRate', borrow = True, allow_downcast=True)
        self.T_bRate = theano.shared(value=np.float32(bRate), name='T_bRate', borrow = True, allow_downcast=True)
        self.T_omgRate = theano.shared(value=np.float32(omegaRate), name='T_omgRate', borrow = True, allow_downcast = True)
        self.T_sigRate = theano.shared(value=np.float32(sigmaRate), name='T_sigRate', borrow = True, allow_downcast = True)
        
        self.loadedRates = [aRate, bRate, omegaRate, sigmaRate]#for load/saveparameters(), can load to see previous rates but differes from constructor declared rates
   
        self.T_rng = RandomStreams() #use_cuda parameter set if on GPU
        #succesive calls on this T_rng will keep returning new values, so for MCMC even with
        #same start v vector value called twice consecutively you'll have different outputs
        #this is normal as the same T_rng gets called, without reset, giving different outputs everytime.
        
        self.T_CD_n = theano.shared(value=CD_n, name='T_CD_n', borrow = True, allow_downcast=True)
              
        if omega is None: #careful! use "1.0" instead of "1" below else it all rounds to zeros!!!
            omega = np.float32(np.random.uniform((-1.0)*(1.0/(np.sqrt(self.nh+self.nv))),(1.0/(np.sqrt(self.nh+self.nv))),self.nv*self.nh).reshape((self.nv,self.nh)))
        self.omega = omega
        self.T_omega = theano.shared(value=omega,name='T_omega',borrow=True, allow_downcast=True)
        #rprop previous gradient
        self.Tomg_grad_prev = theano.shared(value=np.float32(np.abs(omega*omegaRate)+omegaRate), name='Tomg_grad_prev', borrow = True, allow_downcast=True)
        #RMSprop accumulated gradient RMS
        self.Tomg_rmsH = theano.shared(value=omega,name='Tomg_rmsH', borrow=True, allow_downcast=True)
        
        if b is None:
            b = np.float32(np.random.uniform((-1.0)*(1.0/(self.nv)),(1.0/(self.nv)),self.nh))
        self.b = b
        self.T_b = theano.shared(value=b,name='T_b',borrow=True, allow_downcast=True)
        #rprop previous gradient
        self.Tb_grad_prev = theano.shared(value=np.float32(np.abs(bRate*b)+bRate), name='Tb_grad_prev', borrow = True, allow_downcast=True)
        #RMSprop accumulated gradient RMS
        self.Tb_rmsH = theano.shared(value = b, name = 'Tb_rmsH', borrow = True, allow_downcast = True)
        
        if a is None:
            a = np.float32(np.random.uniform((-1.0)*(1.0/(self.nh)),(1.0/(self.nh)),self.nv))
        self.a = a
        self.T_a = theano.shared(value=a,name='T_a',borrow=True, allow_downcast=True)
        #rprop previous gradient
        self.Ta_grad_prev = theano.shared(value=np.float32(np.abs(aRate*a)+aRate), name='Ta_grad_prev', borrow = True, allow_downcast=True)
        #RMSprop accumulated gradient RMS
        self.Ta_rms = theano.shared(value=a, name='Ta_rms', borrow=True, allow_downcast=True)
        
        # for sigma parameter we train z instead with e^z = \sigma^2
        if z is None:
            z = np.float32(np.random.normal(0.0,(1.0/(self.nh*self.nh)),self.nv))#np.asarray([0.0]*self.nv, dtype=theano.config.floatX)
        self.z = z
        self.T_z = theano.shared(value=z,name='T_z',borrow=True, allow_downcast=True) 
        self.T_sigmaSqr = T.exp(self.T_z)
        #rprop previous gradient
        self.Tz_grad_prev = theano.shared(value=np.float32(np.float32(np.abs(z*sigmaRate)+sigmaRate)), name='Tz_grad_prev', borrow = True, allow_downcast=True)
        #RMSprop accumulated gradient RMS
        self.Tz_rmsH = theano.shared(value=z, name = 'Tz_rmsH', borrow=True, allow_downcast=True)
               
        self.T_logZk = theano.shared(value = np.float32(0.0), name = 'T_logZk', borrow=True, allow_downcast=True)

        #will print in ipython notebook:
        print("RBMrv constructed for " + str(len(self.v)) + " visible units and " + str(len(self.h)) + " hidden units.")
Ejemplo n.º 34
0
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in range(iters):
    r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
if numpy.any([isinstance(x.op, T.Elemwise)
              for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')
Ejemplo n.º 35
0
def discountModel(alpha, length):
    """
    discount model
    """
    return tensor.exp(alpha * length * (-1))
Ejemplo n.º 36
0
def GESD(sum_uni_l, sum_uni_r):
    eucli = 1 / (1 + T.sum((sum_uni_l - sum_uni_r)**2))
    kernel = 1 / (1 + T.exp(-(T.dot(sum_uni_l, sum_uni_r.T) + 1)))
    return (eucli * kernel).reshape((1, 1))
Ejemplo n.º 37
0
def RBF(sum_uni_l, sum_uni_r):
    eucli = T.sum((sum_uni_l - sum_uni_r)**2)
    return T.exp(-0.5 * eucli).reshape((1, 1))
Ejemplo n.º 38
0
def _log_add(a, b):
    # TODO: move functions like this to utils
    max_ = tensor.maximum(a, b)
    result = (max_ + tensor.log1p(tensor.exp(a + b - 2 * max_)))
    return T.switch(T.isnan(result), max_, result)
def laplace(x, mean, logvar):
    sd = T.exp(0.5 * logvar)
    return - abs(x - mean) / sd - 0.5 * logvar - np.log(2)
Ejemplo n.º 40
0
def _log_dot_matrix(x, z):
    y = x[:, :, None] + z[None, :, :]
    y_max = y.max(axis=1)
    out = T.log(T.sum(T.exp(y - y_max[:, None, :]), axis=1)) + y_max
    return T.switch(T.isnan(out), -numpy.inf, out)
Ejemplo n.º 41
0
def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
Ejemplo n.º 42
0
def log_sum_exp(x):
    m = T.max(x, axis=0)
    return T.log(T.sum(T.exp(x - m))) + m
Ejemplo n.º 43
0
    beta = T.minimum(
        1,
        T.cast(total_iters, theano.config.floatX) / lib.floatX(BETA_ITERS))
    return T.nnet.relu(logsig, alpha=beta)


# Layer 1

mu_and_logsig1 = Enc1(images)
mu1, logsig1 = split(mu_and_logsig1)

if VANILLA:
    latents1 = mu1
else:
    eps = T.cast(theano_srng.normal(mu1.shape), theano.config.floatX)
    latents1 = mu1 + (eps * T.exp(logsig1))

outputs1 = Dec1(latents1, images)

reconst_cost = T.nnet.categorical_crossentropy(
    T.nnet.softmax(outputs1.reshape((-1, 256))), images.flatten()).mean()

# Layer 2

mu_and_logsig2 = Enc2(latents1)
mu2, logsig2 = split(mu_and_logsig2)

if VANILLA:
    latents2 = mu2
else:
    eps = T.cast(theano_srng.normal(mu2.shape), theano.config.floatX)
def normal2(x, mean, logvar):
    return c - logvar / 2 - (x - mean) ** 2 / (2 * T.exp(logvar))
Ejemplo n.º 45
0
# initialize the weight vector w randomly
w = theano.shared(rng.randn(feats), name="w")

# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)

# initialize the bias term
b = theano.shared(0., name="b")

print("Initial model:")
print(w.get_value())
print(b.get_value())

# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))  # Probability that target = 1
prediction = p_1 > 0.5  # The prediction thresholded
xent = -y * T.log(p_1) - (1 - y) * T.log(
    1 - p_1)  # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w**2).sum()  # The cost to minimize
gw, gb = T.grad(cost, [w, b])  # Compute the gradient of the cost

rmse = ((y - p_1)**2).mean()

# w.r.t weight vector w and
# bias term b
# (we shall return to this in a
# following section of this tutorial)

# Compile
train = theano.function(inputs=[x, y],
Ejemplo n.º 46
0
 def expr_generator(a, b):
     ra = [T.pow(a[i], i) for i in range(len(a))]
     return ra, T.exp(b)
Ejemplo n.º 47
0
 def standard_prob(self, x, p=None):
     if p is None:
         p = self.get_prob(*self.get_params())
     return T.exp(-self.neg_log_prob(x, p))
Ejemplo n.º 48
0
#! /usr/bin/env python3
# Taken from http://deeplearning.net/software/theano/tutorial/using_gpu.html
from theano import function, config, shared, tensor
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], tensor.exp(x))
t0 = time.time()
for i in range(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r, ))
if numpy.any([
        isinstance(x.op, tensor.Elemwise)
        and ("Gpu" not in type(x.op).__name__)
        for x in f.maker.fgraph.toposort()
]):
    print("Used the CPU")
else:
    print("Used the GPU")
Ejemplo n.º 49
0
    def _step(
            m_,
            x_,
            xx_,
            h_,
            ctx_,
            alpha1_,
            alpha2_,  # These ctx and alpha's are not used in the computations
            pctx1_,
            pctx2_,
            cc1_,
            cc2_,
            U,
            Wc,
            W_comb_att,
            W_comb_att2,
            U_att,
            c_att,
            Ux,
            Wcx,
            U_nl,
            Ux_nl,
            b_nl,
            bx_nl):

        # Do a step of classical GRU
        h1 = gru_step(m_, x_, xx_, h_, U, Ux)

        ###########
        # Attention
        ###########
        # h1 X W_comb_att
        # W_comb_att: dim -> dimctx
        # pstate_ should be 2D as we're working with unrolled timesteps
        pstate1_ = tensor.dot(h1, W_comb_att)
        pstate2_ = tensor.dot(h1, W_comb_att2)

        # Accumulate in pctx*__ and apply tanh()
        # This becomes the projected context(s) + the current hidden state
        # of the decoder, e.g. this is the information accumulating
        # into the returned original contexts with the knowledge of target
        # sentence decoding.
        pctx1__ = tanh(pctx1_ + pstate1_[None, :, :])
        pctx2__ = tanh(pctx2_ + pstate2_[None, :, :])

        # Affine transformation for alpha* = (pctx*__ X U_att) + c_att
        # We're now down to scalar alpha's for each accumulated
        # context (0th dim) in the pctx*__
        # alpha1 should be n_timesteps, 1, 1
        alpha1 = tensor.dot(pctx1__, U_att) + c_att
        alpha2 = tensor.dot(pctx2__, U_att) + c_att

        # Drop the last dimension, e.g. (n_timesteps, 1)
        alpha1 = alpha1.reshape([alpha1.shape[0], alpha1.shape[1]])
        alpha2 = alpha2.reshape([alpha2.shape[0], alpha2.shape[1]])

        # Exponentiate alpha1
        alpha1 = tensor.exp(alpha1 - alpha1.max(0, keepdims=True))
        alpha2 = tensor.exp(alpha2 - alpha2.max(0, keepdims=True))

        # If there is a context mask, multiply with it to cancel unnecessary steps
        # We won't have a ctx_mask for image vectors
        if ctx1_mask:
            alpha1 = alpha1 * ctx1_mask

        # Normalize so that the sum makes 1
        alpha1 = alpha1 / alpha1.sum(0, keepdims=True)
        alpha2 = alpha2 / alpha2.sum(0, keepdims=True)

        # Compute the current context ctx*_ as the alpha-weighted sum of
        # the initial contexts ctx*'s
        ctx1_ = (cc1_ * alpha1[:, :, None]).sum(0)
        ctx2_ = (cc2_ * alpha2[:, :, None]).sum(0)
        # n_samples x ctxdim (2000)

        # Sum of contexts
        ctx_ = tanh(ctx1_ + ctx2_)

        ############################################
        # ctx*_ and alpha computations are completed
        ############################################

        ####################################
        # The below code is another GRU cell
        ####################################
        # Affine transformation: h1 X U_nl + b_nl
        # U_nl, b_nl: Stacked dim*2
        preact = tensor.dot(h1, U_nl) + b_nl

        # Transform the weighted context sum with Wc
        # and add it to preact
        # Wc: dimctx -> Stacked dim*2
        preact += tensor.dot(ctx_, Wc)

        # Apply sigmoid nonlinearity
        preact = sigmoid(preact)

        # Slice activations: New gates r2 and u2
        r2 = tensor_slice(preact, 0, dim)
        u2 = tensor_slice(preact, 1, dim)

        preactx = (tensor.dot(h1, Ux_nl) + bx_nl) * r2
        preactx += tensor.dot(ctx_, Wcx)

        # Candidate hidden
        h2_tilda = tanh(preactx)

        # Leaky integration between the new h2 and the
        # old h1 computed in line 285
        h2 = u2 * h2_tilda + (1. - u2) * h1
        h2 = m_[:, None] * h2 + (1. - m_)[:, None] * h1

        return h2, ctx_, alpha1.T, alpha2.T
Ejemplo n.º 50
0
def _softmax(x):
    axis = x.ndim - 1
    e_x = T.exp(x - x.max(axis=axis, keepdims=True))
    out = e_x / e_x.sum(axis=axis, keepdims=True)
    return out
Ejemplo n.º 51
0
def log_softmax(x):
    xdev = x - x.max(1, keepdims=True)
    return xdev - T.log(T.sum(T.exp(xdev), axis=1, keepdims=True))
Ejemplo n.º 52
0
 def step_sample(self, epsilon, p):
     dim = p.shape[p.ndim - 1] // self.scale
     mu = _slice(p, 0, dim)
     log_sigma = _slice(p, 1, dim)
     return mu + epsilon * T.exp(log_sigma)
import theano
import theano.tensor as T
import numpy
import random

x = T.vector()
w = theano.shared(numpy.array([-1.,1.]))
b = theano.shared(0.)

z = T.dot(w,x) + b
y = 1 / (1 + T.exp(-z))

neuron = theano.function(
			inputs = [x],
			outputs = y)

y_hat = T.scalar()  #referencia de variable salida
cost = T.sum((y - y_hat) ** 2)  #funcion costo

dw, db = T.grad(cost,[w,b])  #gradiente con respecto a w y b

gradient = theano.function(   #Funcion para calcular gradientes
			inputs = [x,y_hat],
			outputs = [dw,db])

x = [1, -1]
y_hat = 1

for i in range(100):
	print neuron(x)
	dw, db = gradient(x, y_hat) 
Ejemplo n.º 54
0
    def _step_slice(m_, x_, xx_, yg, h_, ctx_, alpha_, alpha_past_, beta, pctx_, cc_,
                    U, Wc, W_comb_att, U_att, c_tt, Ux, Wcx, U_nl, Ux_nl, b_nl, bx_nl, conv_Q, conv_Uf, conv_b, 
                    Whg, bhg, Umg, W_m_att, U_when_att, c_when_att):
        preact1 = tensor.dot(h_, U)
        preact1 += x_
        preact1 = tensor.nnet.sigmoid(preact1)

        r1 = _slice(preact1, 0, dim) # reset gate
        u1 = _slice(preact1, 1, dim) # update gate

        preactx1 = tensor.dot(h_, Ux)
        preactx1 *= r1
        preactx1 += xx_

        h1 = tensor.tanh(preactx1)

        h1 = u1 * h_ + (1. - u1) * h1
        h1 = m_[:, None] * h1 + (1. - m_)[:, None] * h_
        
        g_m = tensor.dot(h_, Whg) + bhg
        g_m += yg
        g_m = tensor.nnet.sigmoid(g_m)
        mt = tensor.dot(h1, Umg)
        mt = tensor.tanh(mt)
        mt *= g_m
        # attention
        pstate_ = tensor.dot(h1, W_comb_att)

        # converage vector
        cover_F = theano.tensor.nnet.conv2d(alpha_past_[:,None,:,None],conv_Q,border_mode='half') # batch x dim x SeqL x 1
        cover_F = cover_F.dimshuffle(1,2,0,3) # dim x SeqL x batch x 1
        cover_F = cover_F.reshape([cover_F.shape[0],cover_F.shape[1],cover_F.shape[2]])
        assert cover_F.ndim == 3, \
            'Output of conv must be 3-d: #dim x SeqL x batch'
        #cover_F = cover_F[:,pad:-pad,:]
        cover_F = cover_F.dimshuffle(1, 2, 0)
        # cover_F must be SeqL x batch x dimctx
        cover_vector = tensor.dot(cover_F, conv_Uf) + conv_b
        # cover_vector = cover_vector * context_mask[:,:,None]

        pctx__ = pctx_ + pstate_[None, :, :] + cover_vector
        #pctx__ += xc_
        pctx__ = tensor.tanh(pctx__)
        alpha = tensor.dot(pctx__, U_att)+c_tt
        # compute alpha_when
        
        pctx_when = tensor.dot(mt, W_m_att)
        pctx_when += pstate_
        pctx_when = tensor.tanh(pctx_when)
        alpha_when = tensor.dot(pctx_when, U_when_att)+c_when_att # batch * 1
        
        alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]]) # SeqL * batch
        alpha = tensor.exp(alpha)
        alpha_when = tensor.exp(alpha_when)
        if context_mask:
            alpha = alpha * context_mask
        if context_mask:
            alpha_mean = alpha.sum(0, keepdims=True) / context_mask.sum(0, keepdims=True)
        else:
            alpha_mean = alpha.mean(0, keepdims=True)
        alpha_when = concatenate([alpha_mean, alpha_when.T], axis=0) # (SeqL+1)*batch
        alpha = alpha / alpha.sum(0, keepdims=True)
        alpha_when = alpha_when / alpha_when.sum(0, keepdims=True)
        beta = alpha_when[-1, :]
        alpha_past = alpha_past_ + alpha.T
        ctx_ = (cc_ * alpha[:, :, None]).sum(0)  # current context
        ctx_ = beta[:, None] * mt + (1. - beta)[:, None] * ctx_

        preact2 = tensor.dot(h1, U_nl)+b_nl
        preact2 += tensor.dot(ctx_, Wc)
        preact2 = tensor.nnet.sigmoid(preact2)

        r2 = _slice(preact2, 0, dim)
        u2 = _slice(preact2, 1, dim)

        preactx2 = tensor.dot(h1, Ux_nl)+bx_nl
        preactx2 *= r2
        preactx2 += tensor.dot(ctx_, Wcx)

        h2 = tensor.tanh(preactx2)

        h2 = u2 * h1 + (1. - u2) * h2
        h2 = m_[:, None] * h2 + (1. - m_)[:, None] * h1

        return h2, ctx_, alpha.T, alpha_past, beta  # pstate_, preact, preactx, r, u
Ejemplo n.º 55
0
    def kmLossFunction(self, vMax, rnaConc, kDeg, isEndoRnase, alpha):
        '''
		Generates the functions used for estimating the per-RNA affinities (Michaelis-Menten
		constants) to the endoRNAses.

		The optimization problem is formulated as a multidimensional root-finding problem; the goal
		is to find a set of Michaelis-Menten constants such that the endoRNAse-mediated degradation
		under basal concentrations is consistent with the experimentally observed half-lives, thus

		(nonlinear rate) = (linear rate)

		where the nonlinear rate is the rate as predicted from some kinetic rate law, and the
		linear rate is proportional to the inverse of the observed half-life.  Then, reordering,

		0 = (nonlinear rate) - (linear rate)

		is (for the moment) the root we wish to find, for each RNA species, giving us the
		multidimensional function

		R_aux = (nonlinear rate) - (linear rate)

		This is the unnormalized residual function; the normalized residuals are

		R = (nonlinear rate)/(linear rate) - 1

		In addition to matching our half-lives we also desire the Michaelis-Menten constants to be
		non-negative (negative values have no physical meaning).  Thus we introduce a penalty term
		for negative values.  TODO (John): explain penalty term

		The two terms (the residuals R and the negative value penalty Rneg) are combined into one
		'loss' function L (alpha is the weighting on the negative value penalty):

		L = ln((exp(R) + exp(alpha*Rneg))/2)
		  = ln(exp(R) + exp(alpha*Rneg)) - ln(2)

		The loss function has one element for each RNA.  This functional form is a soft
		(continuous and differentiable) approximation to

		L = max(R, alpha*Rneg)

		The root finder, provided with L, will attempt to make each element of L as close to zero
		as possible, and therefore minimize both R and Rneg.

		The third-party package Theano is used to create the functions and find an analytic
		expression for the Jacobian.

		Parameters
		----------
		vMax: scalar
			The total endoRNAse capacity, in dimensions of amount per volume per time.
		rnaConc: 1-D array, float
			Concentrations of RNAs (that will be degraded), in dimensions of amount per volume.
		kDeg: 1-D array, float
			Experimentally observed degradation rates (computed from half-lives), in dimensions of
			per unit time.
		isEndoRnase: 1-D array, bool
			A vector that is True everywhere that an RNA corresponds to an endoRNAse; that is, an
			endoRNAse (or endoRNAse subunit) mRNA.
		alpha: scalar, >0
			Regularization weight, used to penalize for negative Michaelis-Menten value predictions
			during the course of the optimization.  Typical value is 0.5.

		Returns
		-------
		L: function
			The 'loss' function.
		Rneg: function
			The negative Michaelis-Menten constant penalty terms.
		R: function
			The residual error (deviation from steady-state).
		Lp: function
			The Jacobian of the loss function L with respect to the Michaelis-Menten constants.
		R_aux: function
			Unnormalized 'residual' function.
		L_aux: function
			Unnormalized 'loss' function.
		Lp_aux: function
			Jacobian of the unnormalized 'loss' function.
		Jacob: function
			Duplicate with Lp.
		Jacob_aux: function
			Duplicate with Lp_aux.

		Notes
		-----
		The regularization term also includes a penalty for the endoRNAse residuals, as well as a
		fixed weighting (WFendoR = 0.1).
		TODO (John): Why is this needed?  It seems redundant.
		TODO (John): How do we know this weight is sufficient?

		All of the outputs are Theano functions, and take a 1-D array of Michaelis-Menten constants
		as their sole inputs.  All of the functions return a 1-D array, with the exception of the
		Jacobians, which return matrices.

		TODO (John): Remove the redundant outputs.

		TODO (John): Look into removing Theano, since it is no longer maintained.  We could use
		another package with similar functionality (analytic differentiation on algebraic
		functions), or replace the Theano operations with hand-computed solutions (difficult, as
		the Jacobian is probably very complicated).

		TODO (John): Consider redesigning this as an objective minimization problem rather than a
		root finding problem.

		TODO (John): Consider replacing the Michaelis-Menten constants with logarithmic
		equivalents, thereby eliminating the requirement for the negative value penalty.

		TODO (John): Consider moving this method out of this class, as it is, in fact, a static
		method, and isn't utilized anywhere within this class.
		'''

        N = rnaConc.size
        km = T.dvector()

        # Residuals of non-linear optimization
        residual = (vMax / km / kDeg) / (1 + (rnaConc / km).sum()) - np.ones(N)
        residual_aux = (vMax * rnaConc / km) / (1 + (rnaConc / km).sum()) - (
            kDeg * rnaConc)

        # Counting negative Km's (first regularization term)
        regularizationNegativeNumbers = (np.ones(N) -
                                         km / np.abs(km)).sum() / N

        # Penalties for EndoR Km's, which might be potentially nonf-fitted
        regularizationEndoR = (isEndoRnase * np.abs(residual)).sum()

        # Multi objective-based regularization
        WFendoR = 0.1  # weighting factor to protect Km optimized of EndoRNases
        regularization = regularizationNegativeNumbers + (WFendoR *
                                                          regularizationEndoR)

        # Loss function
        LossFunction = T.log(T.exp(residual) +
                             T.exp(alpha * regularization)) - T.log(2)
        LossFunction_aux = T.log(
            T.exp(residual_aux) + T.exp(alpha * regularization)) - T.log(2)

        J = theano.gradient.jacobian(LossFunction, km)
        J_aux = theano.gradient.jacobian(LossFunction_aux, km)
        Jacob = theano.function([km], J)
        Jacob_aux = theano.function([km], J_aux)
        L = theano.function([km], LossFunction)
        L_aux = theano.function([km], LossFunction_aux)
        Rneg = theano.function([km], regularizationNegativeNumbers)
        R = theano.function([km], residual)
        Lp = theano.function([km], J)
        Lp_aux = theano.function([km], J_aux)
        R_aux = theano.function([km], residual_aux)

        return L, Rneg, R, Lp, R_aux, L_aux, Lp_aux, Jacob, Jacob_aux
Ejemplo n.º 56
0
 def gaussian(self, freq, numax, w, A):
     return A * tt.exp(-0.5 * tt.sqr((freq - numax)) / tt.sqr(w))
Ejemplo n.º 57
0
def mixture_model_mobile_centers(
        data_2d,
        N,  # noqa: N803
        M,
        std,
        lam_backg,
        nsteps,
        nchains
):
    """Define the mixture model and sample from it.

    This mobile centers model
    extends the above mixture model in that allows the center positions of
    each atom to vary slightly from the center of the lattice site. This should
    help in cases of lattice inhomogeneity.

    Parameters
    ----------
    data_2d : ndarray of floats
        2D intensity distribution of the collected light
    N : integer
        number of lattice sites along one axis
    M : integer
        number of pixels per lattice site along one axis
    std : float
        Gaussian width of the point spread function
    lam_backg: integer
        Expected value of the Poissonian background
    nsteps : integer
        number of steps taken by each walker in the pymc3 sampling
    nchains : integer
        number of walkers in the pymc3 sampling

    Returns
    -------
    traces : pymc3 MultiTrace
        An object that contains the samples.
    df : dataframe
        Samples converted into a dataframe object

    """
    # x-pixel locations for the entire image
    x = np.arange(0, N*M)
    # X, Y meshgrid of pixel locations
    X, Y = np.meshgrid(x, x)  # noqa: N806

    # atom center locations are explicitly supplied as the centers of
    # the lattice sites
    centers = np.linspace(0, (N-1)*M, N)+M/2
    Xcent_mu, Ycent_mu = np.meshgrid(centers, centers)  # noqa: N806

    with pm.Model() as mixture_model:  # noqa: F841

        # Priors

        # continuous numbers characterizing if lattice sites are filled
        # or not.
        q = pm.Uniform('q', lower=0, upper=1, shape=(N, N))

        # Allow centers to move but we expect them to be
        # pretty near their lattice centers
        Xcent = pm.Normal(  # noqa: N806
            'Xcent',
            mu=Xcent_mu,
            sigma=Xcent_mu/10,
            shape=(N, N)
        )

        Ycent = pm.Normal(  # noqa: N806
            'Ycent',
            mu=Ycent_mu,
            sigma=Ycent_mu/10,
            shape=(N, N)
        )

        # Amplitude of the Gaussian signal for the atoms
        aa = pm.Gamma('Aa', mu=3, sd=0.5)
        # Amplitude of the uniform background signal
        ab = pm.Gamma('Ab', mu=0.5, sd=0.1)

        # Width of the Gaussian likelihood for the atoms
        sigma_a = pm.Gamma('sigma_a', mu=1, sd=0.1)

        # Width of the Gaussian likelihood for the background
        sigma_b = pm.Gamma('sigma_b', mu=1, sd=0.1)

        # Width of the point spread function
        atom_std = pm.Gamma('std', mu=std, sd=0.1)

        # Instead of tiling a single_atom PSF with kronecker, use
        # broadcasting and summing along appropriate axis
        # to allow for spill over of one atom to neighboring sites.
        atom = tt.sum(
                tt.sum(
                    q*aa * tt.exp(
                        -((X[:, :, None, None] - Xcent)**2 +
                          (Y[:, :, None, None] - Ycent)**2) / (2 * atom_std**2)
                    ),
                    axis=2
                ),
                axis=2
            )
        atom += ab

        # background is just flat
        background = ab*np.ones((N*M, N*M))
        # Log-likelihood
        good_data = pm.Normal.dist(mu=atom, sd=sigma_a).logp(data_2d)
        bad_data = pm.Normal.dist(mu=background, sd=sigma_b).logp(data_2d)
        log_like = good_data + bad_data

        pm.Potential('logp', log_like.sum())

        # Sample
        traces = pm.sample(tune=nsteps, draws=nsteps, chains=nchains)

    # convert the PymC3 traces into a dataframe
    df = pm.trace_to_dataframe(traces)

    return traces, df
    def define_layers(self):
        self.params = []

        layer_id = "1"
        self.W_xh = init_weights((self.in_size, self.hidden_size),
                                 self.prefix + "W_xh" + layer_id)
        self.b_xh = init_bias(self.hidden_size,
                              self.prefix + "b_xh" + layer_id)

        layer_id = "2"
        self.W_hu = init_weights((self.hidden_size, self.latent_size),
                                 self.prefix + "W_hu" + layer_id)
        self.b_hu = init_bias(self.latent_size,
                              self.prefix + "b_hu" + layer_id)
        self.W_hsigma = init_weights((self.hidden_size, self.latent_size),
                                     self.prefix + "W_hsigma" + layer_id)
        self.b_hsigma = init_bias(self.latent_size,
                                  self.prefix + "b_hsigma" + layer_id)

        layer_id = "3"
        self.W_zh = init_weights((self.latent_size, self.hidden_size),
                                 self.prefix + "W_zh" + layer_id)
        self.b_zh = init_bias(self.hidden_size,
                              self.prefix + "b_zh" + layer_id)

        self.params += [self.W_xh, self.b_xh, self.W_hu, self.b_hu, self.W_hsigma, self.b_hsigma, \
                        self.W_zh, self.b_zh]

        layer_id = "4"
        if self.continuous:
            self.W_hyu = init_weights((self.hidden_size, self.out_size),
                                      self.prefix + "W_hyu" + layer_id)
            self.b_hyu = init_bias(self.out_size,
                                   self.prefix + "b_hyu" + layer_id)
            self.W_hysigma = init_weights((self.hidden_size, self.out_size),
                                          self.prefix + "W_hysigma" + layer_id)
            self.b_hysigma = init_bias(self.out_size,
                                       self.prefix + "b_hysigma" + layer_id)
            self.params += [
                self.W_hyu, self.b_hyu, self.W_hysigma, self.b_hysigma
            ]
        else:
            self.W_hy = init_weights((self.hidden_size, self.out_size),
                                     self.prefix + "W_hy" + layer_id)
            self.b_hy = init_bias(self.out_size,
                                  self.prefix + "b_hy" + layer_id)
            self.params += [self.W_hy, self.b_hy]

        # encoder
        h_enc = T.nnet.relu(T.dot(self.X, self.W_xh) + self.b_xh)

        self.mu = T.dot(h_enc, self.W_hu) + self.b_hu
        log_var = T.dot(h_enc, self.W_hsigma) + self.b_hsigma
        self.var = T.exp(log_var)
        self.sigma = T.sqrt(self.var)

        srng = T.shared_randomstreams.RandomStreams(234)
        eps = srng.normal(self.mu.shape)
        self.z = self.mu + self.sigma * eps

        # decoder
        h_dec = T.nnet.relu(T.dot(self.z, self.W_zh) + self.b_zh)
        if self.continuous:
            self.reconstruct = T.dot(h_dec, self.W_hyu) + self.b_hyu
            self.log_var_dec = T.dot(h_dec, self.W_hysigma) + self.b_hysigma
            self.var_dec = T.exp(self.log_var_dec)
        else:
            self.reconstruct = T.nnet.sigmoid(
                T.dot(h_dec, self.W_hy) + self.b_hy)
Ejemplo n.º 59
0
 def f(t, x, u):  # bygger upp argumenten for "kollisionen" for alla objekt
     ret = 0.
     for i, (a, b) in enumerate(bounds):
         return -tt.exp((u[i] - b) / width) - tt.exp((a - u[i]) / width)
Ejemplo n.º 60
0
 def free_energy(self, V):
     return -V.dot(self.b) - T.sum(T.log(1 + T.exp(V.dot(self.W) + self.c)),
                                   axis=1)