Example #1
0
    def __init__(self, n_in):
        self.X = theanoTensor.matrix('X', dtype=theano.config.floatX)
        self.y = theanoTensor.vector('y', dtype=theano.config.floatX)

        self.W = theano.shared(name='W',
                               value=np.ones((n_in, ), dtype=theano.config.floatX),
                               borrow=True)

        self.b = theano.shared(name='b',
                               value=np.cast[theano.config.floatX](0.0),
                               borrow=True)

        y_pred = theanoTensor.dot(self.X, self.W) + self.b
        self.MSe = theanoTensor.mean(theanoTensor.pow(y_pred - self.y, 2))
        self.MSy = theanoTensor.mean(theanoTensor.pow(self.y, 2))
        self.R2 = 1 - (self.MSe / self.MSy)

        paramList = [self.W, self.b]
        grad_wrtParams = theanoTensor.grad(self.getMSE(), wrt=paramList)
        learning_rate = 1e-3
        updates = [(p[0], p[0] - learning_rate * p[1]) for p in zip(paramList, grad_wrtParams)]

        self.train_model = theano.function(
            inputs=[self.X, self.y],
            outputs=[self.getMSE()],
            updates=updates
        )

        self.test_model = theano.function(
            inputs=[self.X, self.y],
            outputs=[self.getR2()],
        )
Example #2
0
    def __init__(self,inputData,image_shape):
        self.input=inputData
        num_out=image_shape[1]
        epsilon=0.01
        self.image_shape=image_shape

        gamma_values = numpy.ones((num_out,), dtype=theano.config.floatX)
        self.gamma_vals = theano.shared(value=gamma_values, borrow=True)

        beta_values = numpy.zeros((num_out,), dtype=theano.config.floatX)
        self.beta_vals = theano.shared(value=beta_values, borrow=True)

        batch_mean=T.mean(self.input,keepdims=True,axis=(0,2,3))
        batch_var=T.var(self.input,keepdims=True,axis=(0,2,3))+epsilon

        self.batch_mean=self.adjustVals(batch_mean)
        batch_var=self.adjustVals(batch_var)
        self.batch_var=T.pow(batch_var,0.5)

        batch_normalize=(inputData-self.batch_mean)/(T.pow(self.batch_var,0.5))

        self.beta = self.beta_vals.dimshuffle('x', 0, 'x', 'x')
        self.gamma = self.gamma_vals.dimshuffle('x', 0, 'x', 'x')

        self.output=batch_normalize*self.gamma+self.beta
        #self.output=inputData-self.batch_mean

        self.params=[self.gamma_vals,self.beta_vals]
Example #3
0
  def __init__(self, config, loss, params):
    self._lr = get_shared_floatX(config.learning_rate, 'lr')
    self._t = get_shared_floatX(1, 't')
    self._all_m_tm1 = []
    self._all_v_tm1 = []
    self._updates = [(self._t, self._t + 1)]

    if config.lr_decay:
      lr_coef = tt.pow(config.lr_decay, (self._t - 1) // config.lr_decay_freq)
      self._updates.append((self._lr, lr_coef * config.learning_rate))

    grads = theano.grad(loss, params)

    self._global_grad_norm = tt.sqrt(tt.sum(tt.stack([tt.sum(g**2.) for g in grads])))
    if config.max_grad_norm:
      global_clip_factor = ifelse(tt.lt(self._global_grad_norm, config.max_grad_norm),
        cast_floatX_np(1.),
        cast_floatX(config.max_grad_norm/self._global_grad_norm))
      grads = [global_clip_factor * g for g in grads]

    lr_t = self._lr * \
      clip_sqrt(1 - tt.pow(config.adam_beta2, self._t)) / (1 - tt.pow(config.adam_beta1, self._t))

    for p, g in zip(params, grads):
        m_tm1 = get_shared_floatX(np.zeros_like(p.get_value()), 'adam_m_' + p.name)
        v_tm1 = get_shared_floatX(np.zeros_like(p.get_value()), 'adam_v_' + p.name)
        self._all_m_tm1.append(m_tm1)
        self._all_v_tm1.append(v_tm1)
        m_t = config.adam_beta1 * m_tm1 + (1-config.adam_beta1) * g
        v_t = config.adam_beta2 * v_tm1 + (1-config.adam_beta2) * tt.sqr(g)
        delta_t = -lr_t * m_t / (clip_sqrt(v_t) + config.adam_eps)
        p_t = p + delta_t
        self._updates += [(m_tm1, m_t), (v_tm1, v_t), (p, p_t)]
Example #4
0
    def _model_setup(self):
        with self._model:
            # COSMOLOGY


            omega_m = pm.Uniform("OmegaM", lower=0, upper=1.)

            # dark energy EOS
            w = pm.Normal("w", mu=-1, sd=1)

            # My custom distance mod. function to enable
            # ADVI and HMC smapling.

            dm = distmod_w_flat(omega_m, self._h0, w, self._zcmb)

            # PHILIPS PARAMETERS

            # M0 is the location parameter for the distribution
            # sys_scat is the scale parameter for the M0 distribution
            # rather than "unexpalined variance"
            M0 = pm.Normal("M0", mu=-19.3, sd=2.)
            sys_scat = pm.HalfCauchy('sys_scat', beta=2.5)  # Gelman recommendation for variance parameter
            M_true = pm.Normal('M_true', M0, sys_scat, shape=self._n_SN)

            # following Rubin's Unity model... best idea? not sure
            taninv_alpha = pm.Uniform("taninv_alpha", lower=-.2, upper=.3)
            taninv_beta = pm.Uniform("taninv_beta", lower=-1.4, upper=1.4)

            # Transform variables
            alpha = pm.Deterministic('alpha', T.tan(taninv_alpha))
            beta = pm.Deterministic('beta', T.tan(taninv_beta))

            # Again using Rubin's Unity model.
            # After discussion with Rubin, the idea is that
            # these parameters are ideally sampled from a Gaussian,
            # but we know they are not entirely correct. So instead,
            # the Cauchy is less informative around the mean, while
            # still having informative tails.

            xm = pm.Cauchy('xm', alpha=0, beta=1)
            cm = pm.Cauchy('cm', alpha=0, beta=1)

            Rx_log = pm.Uniform('Rx_log', lower=-0.5, upper=0.5)
            Rc_log = pm.Uniform('Rc_log', lower=-1.5, upper=1.5)

            # Transformed variables
            Rx = pm.Deterministic("Rx", T.pow(10., Rx_log))
            Rc = pm.Deterministic("Rc", T.pow(10., Rc_log))

            x_true = pm.Normal('x_true', mu=xm, sd=Rx, shape=self._n_SN)
            c_true = pm.Normal('c_true', mu=cm, sd=Rc, shape=self._n_SN)

            # Do the correction
            mb = pm.Deterministic("mb", M_true + dm - alpha * x_true + beta * c_true)

            # Likelihood and measurement error

            obsc = pm.Normal("obsc", mu=c_true, sd=self._dcolor, observed=self._color)
            obsx = pm.Normal("obsx", mu=x_true, sd=self._dx1, observed=self._x1)
            obsm = pm.Normal("obsm", mu=mb, sd=self._dmb_obs, observed=self._mb_obs)
    def get_testing_function(test_data, test_mask, pct_blackout=0.5):
        raise Error("fix me!")

        i, batch_size = T.iscalars('i', 'batch_size')
        self.test_noise = T.shared_randomstreams.RandomStreams(1234).binomial(
                            (self.inputs.shape), n=1, p=1-pct_blackout, 
                            dtype=theano.config.floatX)
        self.test_noisy = self.test_noise * self.inputs
        self.test_active_hidden = T.nnet.sigmoid(T.dot(self.test_noisy, self.W) + self.b_in)
        self.test_output = T.nnet.sigmoid(T.dot(self.test_active_hidden, self.W.T) + self.b_out)

        # root mean squared error of unknowns only

        # taking the original input vector's mask of which beers had no input information (no rating)
        # mask out any output predicted ratings where there was no rating of the original beer
        # so we aren't affecting the error factor in dimensions where we don't have any meaningful information in the original input data
        # flattenedOutputVector = dot product ( (mask vector of which items we sent through the network to test, so we only test accuracy of non-inputted answers) with dot product ( inputMask with full output vector ) )
        self.only_originally_unknown = T.dot(1-self.test_noise, T.dot(self.inputs_mask, self.test_output))
        self.test_error = T.pow(T.mean(T.pow(T.dot(self.inputs_mask, self.test_output) - self.inputs, 2)), 0.5)

        self.testing_function = theano.function([i, batch_size], self.test_error, 
                                                givens={self.inputs:        test_data[i:i+batch_size],
                                                        self.inputs_mask:   test_mask[i:i+batch_size]})

        return self.testing_function
Example #6
0
def test_0():

    N = 16*1000*10*1

    if 1:
        aval = abs(numpy.random.randn(N).astype('float32'))+.1
        bval = numpy.random.randn(N).astype('float32')
        a = T.fvector()
        b = T.fvector()
    else:
        aval = abs(numpy.random.randn(N))+.1
        bval = numpy.random.randn(N)
        a = T.dvector()
        b = T.dvector()

    f = theano.function([a,b], T.pow(a,b), mode='LAZY')
    theano_opencl.elemwise.swap_impls=False
    g = theano.function([a,b], T.pow(a,b), mode='LAZY')

    print 'ocl   time', timeit.Timer(lambda: f(aval, bval)).repeat(3,3)

    print 'gcc   time', timeit.Timer(lambda: g(aval, bval)).repeat(3,3)

    print 'numpy time', timeit.Timer(lambda: aval**bval).repeat(3,3)

    assert ((f(aval, bval) - aval**bval)**2).sum() < 1.1
    assert ((g(aval, bval) - aval**bval)**2).sum() < 1.1
Example #7
0
	def _step(self,xg_t, xo_t, xc_t, mask_tm1,h_tm1, c_tm1, u_g, u_o, u_c):

		h_mask_tm1 = mask_tm1 * h_tm1
		c_mask_tm1 = mask_tm1 * c_tm1
		act = T.tensordot( xg_t + h_mask_tm1, u_g , [[1],[2]])
		gate = T.nnet.softmax(act.reshape((-1, act.shape[-1]))).reshape(act.shape)

		c_tilda = self.activation(xc_t + T.dot(h_mask_tm1, u_c))

		sigma_se = self.k_parameters[0]
		sigma_per = self.k_parameters[1]
		sigma_b_lin = self.k_parameters[2]
		sigma_v_lin = self.k_parameters[3]
		sigma_rq = self.k_parameters[4]

		l_se = self.k_parameters[5]
		l_per = self.k_parameters[6]
		l_lin = self.k_parameters[7]
		l_rq = self.k_parameters[8]

		alpha_rq = self.k_parameters[9]
		p_per = self.k_parameters[10]

		k_se = T.pow(sigma_se,2) * T.exp( -T.pow(c_mask_tm1 - c_tilda,2) / (2* T.pow(l_se,2) + self.EPS))
		k_per = T.pow(sigma_per,2) * T.exp( -2*T.pow(T.sin( math.pi*(c_mask_tm1 - c_tilda)/ (p_per + self.EPS) ),2)	 / ( T.pow(l_per,2) + self.EPS ))
		k_lin = T.pow(sigma_b_lin,2) + T.pow(sigma_v_lin,2)	 * (c_mask_tm1 - l_lin) * (c_tilda - l_lin )
		k_rq = T.pow(sigma_rq,2) * T.pow( 1 + T.pow( (c_mask_tm1 - c_tilda),2)	/ ( 2 * alpha_rq * T.pow(l_rq,2) + self.EPS), -alpha_rq)

		ops = [c_mask_tm1,c_tilda,k_se, k_per, k_lin,k_rq]
		yshuff = T.as_tensor_variable( ops, name='yshuff').dimshuffle(1,2,0)
		c_t = (gate.reshape((-1,gate.shape[-1])) * yshuff.reshape((-1,yshuff.shape[-1]))).sum(axis = 1).reshape(gate.shape[:2])
		o_t = self.inner_activation(xo_t + T.dot(h_mask_tm1, u_o))
		h_t = o_t * self.activation(c_t)
		return h_t, c_t
Example #8
0
    def finetune_cost_updates(self, center, mu, learning_rate):
        """ This function computes the cost and the updates ."""

        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, withd one entry per
        #        example in minibatch
        network_output = self.get_output()
        temp = T.pow(center - network_output, 2)    
        
        L =  T.sum(temp, axis=1) 
        # Add the network reconstruction error 
        z = self.get_network_reconst()
        reconst_err = T.sum(T.pow(self.x - z, 2), axis = 1)            
        L = self.beta*L + self.lbd*reconst_err
        
        cost1 = T.mean(L)
        cost2 = self.lbd*T.mean(reconst_err)  
        cost3 = cost1 - cost2

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost1, self.params)  
        # generate the list of updates
        updates = []
        grad_values = []
        param_norm = []
        for param, delta, gparam in zip(self.params, self.delta, gparams):
            updates.append( (delta, mu*delta - learning_rate * gparam) )
            updates.append( (param, param + mu*mu*delta - (1+mu)*learning_rate*gparam ))
            grad_values.append(gparam.norm(L=2))
            param_norm.append(param.norm(L=2))
        
        grad_ = T.stack(*grad_values)
        param_ = T.stack(*param_norm)
        return ((cost1, cost2, cost3, grad_, param_), updates)
Example #9
0
    def lp_norm(self, n, k, r, c, z):
        '''
        Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
        :param n:
        :param k:
        :param r:
        :param c:
        :param z:
        :return:
        '''
        ds0, ds1 = self.pool_size
        st0, st1 = self.stride
        pad_h = self.pad[0]
        pad_w = self.pad[1]

        row_st = r * st0
        row_end = T.minimum(row_st + ds0, self.img_rows)
        row_st = T.maximum(row_st, self.pad[0])
        row_end = T.minimum(row_end, self.x_m2d + pad_h)

        col_st = c * st1
        col_end = T.minimum(col_st + ds1, self.img_cols)
        col_st = T.maximum(col_st, self.pad[1])
        col_end = T.minimum(col_end, self.x_m1d + pad_w)

        Lp = T.pow(
                T.mean(T.pow(
                        T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
                        1 + T.log(1 + T.exp(self.P))
                )),
                1 / (1 + T.log(1 + T.exp(self.P)))
        )

        return T.set_subtensor(z[n, k, r, c], Lp)
Example #10
0
 def __call__(self, loss):
     loss += self.l1 * T.sum(T.mean(abs(self.layer.get_output(True)), axis=0))
     loss += self.l2 * T.sum(T.mean(self.layer.get_output(True) ** 2, axis=0))
     loss += self.l_bin * T.sum(
         T.mean(T.pow(self.layer.get_output(True), self.k) * T.pow(1 - self.layer.get_output(True), self.k), axis=0)
     )
     return loss
 def get_reg_ind(self):
     drop_ax, drop_ay = T.pow(T.exp(self.params[-2]), 2), T.pow(T.exp(self.params[-1]), 2)
     constant = np.cast[theano.config.floatX](.5 * np.log(self.noise_lvl) + c1 * self.noise_lvl + c2 * (self.noise_lvl**2) + c3 * (self.noise_lvl**3))
     reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant
     reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant
     reg_ind = T.cast(T.prod(self.params[3].shape), theano.config.floatX) * reg_indx + T.cast(T.prod(self.params[4].shape), theano.config.floatX) * reg_indy
     return reg_ind
Example #12
0
def get_box_mask_se(a,b):
    '''
    return (batch_size, grid_num, box_num, 1) tensor as mask
    '''
    se = T.pow(T.pow(a-b, 2).sum(axis=-1), .5)
    sem = se.min(axis=-1, keepdims=True) # find the box with lowest square error
    se_mask = T.eq(se, sem).reshape((a.shape[0], a.shape[1], a.shape[2], 1))
    return se_mask
Example #13
0
def gamma_params(mode=10., sd=10.):
    '''
    Converst mode and sd to shape and rate of a gamma distribution.
    '''
    var = Tns.pow(sd, 2)
    rate = (mode + Tns.pow(Tns.pow(mode, 2) + 4 * var, 0.5)) / (2 * var)
    shape = 1 + mode * rate
    return shape, rate
Example #14
0
def objective(x):
    """
    objective function
    @param x: input vector
    @return: value of objective function
    """
    z = x - objective.offset
    return T.sum(T.pow(z, 4) - 16 * T.pow(z, 2) + 5 * z, axis=1) / 2
    def evolve(self, x, n, k, gamma):
        """ Compute time-derivative at current state

        Model: dx/dt = x^n / (x^n + K^n) - gamma*x
        This leads to single-species bistability.
        """
        dxdt = T.pow(x, n)/(T.pow(x, n)+T.pow(k,n)) - gamma*x
        return dxdt
def get_input_vectors(shape, phases, scaling, offset):
    x = T.repeat(offset[0] + T.arange(shape[0]) / scaling, shape[1] * phases).reshape(
        (shape[0], shape[1], phases)) * T.pow(2, T.arange(phases))
    y = T.repeat(T.tile(offset[1] + T.arange(shape[1]) / scaling, shape[0]).reshape(
        (shape[0], shape[1], 1)), phases, axis=2) * T.pow(2, T.arange(phases))
    z = T.tile(offset[2] + 10 * T.arange(phases), shape[0] * shape[1]).reshape((shape[0], shape[1], phases, 1))
    x = x.reshape((shape[0], shape[1], phases, 1))
    y = y.reshape((shape[0], shape[1], phases, 1))
    return T.concatenate([x, y, z], axis=3).reshape((shape[0] * shape[1] * phases, 3)).astype('float32')
 def get_reg_ind(self):
     nsl = self.noise_lvl**2
     constant = .5 * np.log(nsl) + c1 * nsl + c2 * (nsl**2) + c3 * (nsl**3)
     stdx, stdy = self._get_stds()
     drop_ax, drop_ay = T.pow(stdx, 2), T.pow(stdy, 2)
     reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant
     reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant
     reg_ind = T.sum(reg_indx) + T.sum(reg_indy)
     return reg_ind
 def _loopoverallballallbatch(self, ballid): 
     ox=self.middle[:,(ballid)*3].reshape((self.batchsize,1))
     x=T.tile(ox,(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width))
     oy=self.middle[:,(ballid)*3+1].reshape((self.batchsize,1))
     y=T.tile(oy,(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width))
     w=T.tile(T.tile(T.arange(0,self.width),(self.height,)),(self.batchsize,)).reshape((self.batchsize,self.height,self.width))
     h=T.tile(T.tile(T.arange(0,self.height).reshape((self.height,1)),(1,self.width)),(self.batchsize,1)).reshape((self.batchsize,self.height,self.width))
     multiply=T.tile(self.middle[:,(ballid)*3+2].reshape((self.batchsize,1)),(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width))
     results=multiply*T.exp((T.pow(x-w,2)+T.pow(y-h,2))*(-1.0/self.sigma))
     return results
Example #19
0
    def init_fun_(self, dim_state, batch_size, gamma, learning_rate, momentum, reward_scaling, reward_scaling_update):
        """Define and compile function to train and evaluate network
        :param net: Lasagne output layer
        :param dim_state: dimensions of a single state tensor
        :param batch_size:
        :param gamma: future reward discount factor
        :param learning_rate:
        :param momentum:
        :param reward_scaling:
        :param reward_scaling_update:
        :return:
        """
        if len(dim_state) != 3:
            raise ValueError("We only support 3 dimensional states.")

        # inputs
        old_states, new_states = T.tensor4s('old_states', 'new_states')   # (BATCH_SIZE, MEMORY_LENGTH, DIM_STATE[0], DIM_STATE[1])
        actions = T.ivector('actions')           # (BATCH_SIZE, 1)
        rewards = T.vector('rewards')            # (BATCH_SIZE, 1)
        rs = shared(value=reward_scaling*1.0, name='reward_scaling')

        # intermediates
        predict_q = lasagne.layers.get_output(layer_or_layers=self.qnn, inputs=old_states)
        predict_next_q = lasagne.layers.get_output(layer_or_layers=self.qnn_target, inputs=new_states)
        target_q = rewards/rs + gamma*T.max(predict_next_q, axis=1)

        # penalty
        singularity = 1+1e-3
        penalty = T.mean(
            1/T.pow(predict_q[T.arange(batch_size), actions]-singularity, 2) +
            1/T.pow(predict_q[T.arange(batch_size), actions]+singularity, 2) - 2)


        # outputs
        loss = T.mean((predict_q[T.arange(batch_size), actions] - target_q)**2) + (1e-5)*penalty

        # weight update formulas (mini-batch SGD with momentum)
        params = lasagne.layers.get_all_params(self.qnn, trainable=True)
        updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=momentum)
        updates_rs = lasagne.updates.nesterov_momentum(loss, [rs], learning_rate=learning_rate, momentum=momentum)

        # functions
        fun_train_qnn = theano.function([old_states, actions, rewards, new_states], loss, updates=updates, allow_input_downcast=True)
        fun_adapt_rs = theano.function([old_states, actions, rewards, new_states], loss, updates=updates_rs, allow_input_downcast=True)

        def fun_clone_target():
            lasagne.layers.helper.set_all_param_values(
                self.qnn_target,
                lasagne.layers.helper.get_all_param_values(self.qnn)
            )

        fun_q_lookup = theano.function([old_states], predict_q, allow_input_downcast=True)
        fun_rs_lookup = rs.get_value

        return fun_train_qnn, fun_adapt_rs, fun_clone_target, fun_q_lookup, fun_rs_lookup
Example #20
0
def integrand_w_flat(z, Om, w):
    """

    :param z: redshift
    :param Om: matter content
    :param w: DE EOS
    :return: theano array of 1/H(z)
    """
    zp = 1 + z
    Ode = 1 - Om - Or  # Adjust cosmological constant
    return T.power((T.pow(zp, 3) * (Or * zp + Om) + Ode * T.pow(zp, 3.0 * (1 + w))), -0.5)
 def _loopoverallball(self, ballid,batchid):        
     ox=self.middle[batchid][ballid*2].reshape((1,1))
     print "ox:",ox.ndim
     x=T.tile(ox,(self.height,self.width))
     oy=self.middle[batchid][ballid*2+1].reshape((1,1))
     y=T.tile(oy,(self.height,self.width))
     w=T.tile(T.arange(0,self.width),(self.height,)).reshape((self.height,self.width))
     h=T.tile(T.arange(0,self.height).reshape((self.height,1)),(1,self.width))
     cof=(T.pow(x-w,2)+T.pow(y-h,2))*(-1.0/self.sigma)        
     print T.exp(cof).ndim
     return T.exp(cof)
    def evolve_system(self, x, n, k, gamma):
        """ Compute time-derivative at current state

        Model: dx/dt = k^n / (x^n + K^n) - gamma*x
        This leads to 3+ species sustained oscillations. Note that x is matrix.

        We have dependency only on preceding variable, which can be efficiently implemented
        by rolling the matrix by `shift=-1` along corresponding axis.
        """
        temp = T.pow(k, n)/(T.pow(x, n)+T.pow(k,n))
        dxdt = T.roll(temp, shift = -1, axis = 1) - gamma*x
        return dxdt
    def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6):
        """Transcription of equation 2.1 from paper (page 1434).
        """
        if len(w_list) != len(x_list):
            raise ValueError('w_list must have same len as x_list')
        output = (sum(w * tensor.pow(x, p) for (w, x) in zip(w_list, x_list)))\
                / (theano._asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))

        assert output.type.ndim == 2
        self.__dict__.update(locals())
        del self.__dict__['self']
        _logger.debug('output dtype %s' % output.dtype)
Example #24
0
def get_garch_model():
    r = np.array([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float64)
    sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float64)
    alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18], dtype=np.float64)
    shape = r.shape

    with Model() as garch:
        alpha1 = Uniform('alpha1', 0., 1., shape=shape)
        beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape)
        mu = Normal('mu', mu=0., sd=100., shape=shape)
        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                        beta1 * tt.pow(sigma1, 2))
        Normal('obs', mu, sd=theta, observed=r)
    return garch
Example #25
0
 def __init__(self, target, initial_phi, profile_s=None, A0=1.0):
     self.target = target
     self.n_pixels = int(target.shape[0] / 2)   # target should be 512x512, but SLM pattern calculated should be 256x256.
     self.intensity_calc = None
     
     self.cost = None   # placeholder for cost function.
     
     if profile_s is None:
         profile_s = np.ones((self.n_pixels, self.n_pixels))
         
     assert profile_s.shape == (self.n_pixels, self.n_pixels), 'profile_s is wrong shape, should be ({n},{n})'.format(n=self.n_pixels)
     self.profile_s_r = profile_s.real.astype('float64')
     self.profile_s_i = profile_s.imag.astype('float64')
     
     assert initial_phi.shape == (self.n_pixels**2,), "initial_phi must be a vector of phases of size N^2 (not (N,N)).  Shape is " + str(initial_phi.shape)
     
     self.A0 = A0
     
     # Set zeros matrix:
     self.zero_frame = np.zeros((2*self.n_pixels, 2*self.n_pixels), dtype='float64')
     
     # Phi and its momentum for use in gradient descent with momentum:
     self.phi    = theano.shared(value=initial_phi.astype('float64'),
                                 name='phi')
     self.phi_rate = theano.shared(value=np.zeros_like(initial_phi).astype('float64'),
                                   name='phi_rate')
     
     self.S_r = theano.shared(value=self.profile_s_r,
                              name='s_r')
     self.S_i = theano.shared(value=self.profile_s_i,
                              name='s_i')
     self.zero_matrix = theano.shared(value=self.zero_frame,
                                      name='zero_matrix')
     
     # E_in: (n_pixels**2)
     phi_reshaped = self.phi.reshape((self.n_pixels, self.n_pixels))
     self.E_in_r = self.A0 * (self.S_r*T.cos(phi_reshaped) - self.S_i*T.sin(phi_reshaped))
     self.E_in_i = self.A0 * (self.S_i*T.cos(phi_reshaped) + self.S_r*T.sin(phi_reshaped))
     
     # E_in padded: (4n_pixels**2)
     idx_0, idx_1 = get_centre_range(self.n_pixels)
     self.E_in_r_pad = T.set_subtensor(self.zero_matrix[idx_0:idx_1,idx_0:idx_1], self.E_in_r)
     self.E_in_i_pad = T.set_subtensor(self.zero_matrix[idx_0:idx_1,idx_0:idx_1], self.E_in_i)
     
     # E_out:
     self.E_out_r, self.E_out_i = fft(self.E_in_r_pad, self.E_in_i_pad)        
     
     # finally, the output intensity:
     self.E_out_2 = T.add(T.pow(self.E_out_r, 2), T.pow(self.E_out_i, 2))
Example #26
0
def get_garch_model():
    r = np.array([28, 8, -3, 7, -1, 1, 18, 12])
    sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18])
    alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18])
    shape = r.shape

    with Model() as garch:
        alpha1 = Normal('alpha1', mu=0., sd=1., shape=shape)
        BoundedNormal = Bound(Normal, upper=(1 - alpha1))
        beta1 = BoundedNormal('beta1', mu=0., sd=1e6, shape=shape)
        mu = Normal('mu', mu=0., sd=1e6, shape=shape)
        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                        beta1 * tt.pow(sigma1, 2))
        Normal('obs', mu, sd=theta, observed=r)
    return garch
Example #27
0
File: ops.py Project: errord/Theano
def spectral_radius_bound(X, log2_exponent):
    """
    Returns upper bound on the largest eigenvalue of square symmetrix matrix X.

    log2_exponent must be a positive-valued integer. The larger it is, the
    slower and tighter the bound.  Values up to 5 should usually suffice.  The
    algorithm works by multiplying X by itself this many times.

    From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric
    Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22.
    Rq: an efficient algorithm, not used here, is defined in this paper.
    """
    if X.type.ndim != 2:
        raise TypeError('spectral_radius_bound requires a matrix argument', X)
    if not isinstance(log2_exponent, int):
        raise TypeError('spectral_radius_bound requires an integer exponent',
                        log2_exponent)
    if log2_exponent <= 0:
        raise ValueError('spectral_radius_bound requires a strictly positive '
                         'exponent', log2_exponent)
    XX = X
    for i in xrange(log2_exponent):
        XX = tensor.dot(XX, XX)
    return tensor.pow(
            trace(XX),
            2 ** (-log2_exponent))
Example #28
0
  def get_updates(self,
                  learning_rate,
                  corruption_level=None,
                  L1_rate=0.000,
                  L2_rate=0.000):
    
    if corruption_level is not None:
      x=self.get_corruption_input(self.input, corruption_level);
      y=self.decode_layer.get_output(self.encode_layer.get_output(x));
    else:
      y=self.decode_layer.out_feature_maps;
      
    cost=T.sum(T.pow(T.sub(self.decode_layer.out_feature_maps, self.feature_maps),2), axis=1);
    
    #cost=self.get_cost(self.feature_maps, y);
    cost+=0.001*((self.encode_layer.filters**2).sum()+(self.decode_layer.filters**2).sum());    
    cost=T.mean(cost);
    
    params=self.encode_layer.params+self.decode_layer.params;
    gparams=T.grad(cost, params);

    updates=[(param_i, param_i-learning_rate*grad_i)
             for param_i, grad_i in zip(params, gparams)];
             
    return cost, updates;
Example #29
0
 def cost(self):
   """
   :param y: shape (time*batch,) -> label
   :return: error scalar, known_grads dict
   """
   y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim = 1), 'int32')
   known_grads = None
   if self.loss == 'sprint':
     if not isinstance(self.sprint_opts, dict):
       import json
       self.sprint_opts = json.loads(self.sprint_opts)
     assert isinstance(self.sprint_opts, dict), "you need to specify sprint_opts in the output layer"
     if self.exp_normalize:
       log_probs = T.log(self.p_y_given_x)
     else:
       log_probs = self.z
     sprint_error_op = SprintErrorSigOp(self.attrs.get("target", "classes"), self.sprint_opts)
     err, grad = sprint_error_op(log_probs, T.sum(self.index, axis=0))
     err = err.sum()
     if self.loss_like_ce:
       y_ref = T.clip(self.p_y_given_x - grad, numpy.float32(0), numpy.float32(1))
       err = -T.sum(T.log(T.pow(self.p_y_given_x, y_ref)) * T.cast(self.index, "float32").dimshuffle(0, 1, 'x'))
     if self.ce_smoothing:
       err *= numpy.float32(1.0 - self.ce_smoothing)
       grad *= numpy.float32(1.0 - self.ce_smoothing)
       if not self.prior_scale:  # we kept the softmax bias as it was
         nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       else:  # assume that we have subtracted the bias by the log priors beforehand
         assert self.log_prior is not None
         # In this case, for the CE calculation, we need to add the log priors again.
         y_m_prior = T.reshape(self.z + numpy.float32(self.prior_scale) * self.log_prior,
                               (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2)
         nll, pcx = T.nnet.crossentropy_softmax_1hot(x=y_m_prior[self.i], y_idx=self.y_data_flat[self.i])
       ce = numpy.float32(self.ce_smoothing) * T.sum(nll)
       err += ce
       grad += T.grad(ce, self.z)
     known_grads = {self.z: grad}
     return err, known_grads
   elif self.loss == 'ctc':
     from theano.tensor.extra_ops import cpu_contiguous
     err, grad, priors = CTCOp()(self.p_y_given_x, cpu_contiguous(self.y.dimshuffle(1, 0)), self.index_for_ctc())
     known_grads = {self.z: grad}
     return err.sum(), known_grads, priors.sum(axis=0)
   elif self.loss == 'ce_ctc':
     y_m = T.reshape(self.z, (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2)
     p_y_given_x = T.nnet.softmax(y_m)
     #pcx = p_y_given_x[(self.i > 0).nonzero(), y_f[(self.i > 0).nonzero()]]
     pcx = p_y_given_x[self.i, self.y_data_flat[self.i]]
     ce = -T.sum(T.log(pcx))
     return ce, known_grads
   elif self.loss == 'ctc2':
     from NetworkCtcLayer import ctc_cost, uniq_with_lengths, log_sum
     max_time = self.z.shape[0]
     num_batches = self.z.shape[1]
     time_mask = self.index.reshape((max_time, num_batches))
     y_batches = self.y_data_flat.reshape((max_time, num_batches))
     targets, seq_lens = uniq_with_lengths(y_batches, time_mask)
     log_pcx = self.z - log_sum(self.z, axis=0, keepdims=True)
     err = ctc_cost(log_pcx, time_mask, targets, seq_lens)
     return err, known_grads
Example #30
0
def l2norm(X):
    """
    Divide by L2 norm, row-wise
    """
    norm = T.sqrt(T.pow(X, 2).sum(1))
    X /= norm[:, None]
    return X
Example #31
0
def pow(x, a):
    return T.pow(x, a)
Example #32
0
def inner_fn(t, stm1, postm1, vtm1,\
r_Wq_hst_ot, r_Wq_hst_oht, r_Wq_hst_stm1, r_bq_hst,\
r_Wq_hst2_hst, r_bq_hst2,\
r_Wq_stmu_hst2, r_bq_stmu,\
r_Wq_stsig_hst2, r_bq_stsig,\
r_Wl_stmu_stm1, r_bl_stmu,\
r_Wl_stsig_stm1, r_bl_stsig,\
r_Wl_ost_st, r_bl_ost,\
r_Wl_ost2_ost, r_bl_ost2,\
r_Wl_ost3_ost2, r_bl_ost3,\
r_Wl_otmu_st, r_bl_otmu,\
r_Wl_otsig_st, r_bl_otsig,\
r_Wl_ohtmu_st, r_bl_ohtmu,\
r_Wl_ohtsig_st, r_bl_ohtsig,\
r_Wa_aht_st, r_ba_aht,\
r_Wa_atmu_aht, r_ba_atmu,\
r_Wa_atsig_aht, r_ba_atsig\
):

    # Use hidden state to generate action state
    aht = T.batched_tensordot(r_Wa_aht_st,
                              T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                              axes=[[2], [1]]) + r_ba_aht
    #aht2 = T.batched_tensordot(r_Wa_aht2_aht, T.reshape(aht,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht2
    #aht3 = T.batched_tensordot(r_Wa_aht3_aht2, T.reshape(aht2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht3
    at_mu = T.batched_tensordot(r_Wa_atmu_aht,
                                T.reshape(aht, (n_perturbations, n_s, n_proc)),
                                axes=[[2], [1]]) + r_ba_atmu
    at_sig = T.nnet.softplus(
        T.batched_tensordot(r_Wa_atsig_aht,
                            T.reshape(aht, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_ba_atsig) + sig_min_action

    # Sample Action
    at = at_mu + theano_rng.normal((n_perturbations, n_oa, n_proc)) * at_sig

    # Update Environment
    action_force = T.tanh(at)
    force = T.switch(
        T.lt(postm1, 0.0), -2 * postm1 - 1, -T.pow(1 + 5 * T.sqr(postm1), -0.5)
        - T.sqr(postm1) * T.pow(1 + 5 * T.sqr(postm1), -1.5) -
        T.pow(postm1, 4) / 16.0) - 0.25 * vtm1
    vt = vtm1 + 0.05 * force + 0.03 * action_force
    post = postm1 + vt

    # Generate Sensory Inputs:

    # 1.) Observation of Last Action
    #oat = at

    # 2.) Noisy Observation of Current Position
    ot = post + theano_rng.normal((n_perturbations, n_o, n_proc)) * 0.01

    # 3.) Nonlinear Transformed Sensory Channel
    oht = T.exp(-T.sqr(post - 1.0) / 2.0 / 0.3 / 0.3)

    # Infer hidden state from last hidden state and current observations, using variational density
    hst = T.nnet.relu(
        T.batched_tensordot(r_Wq_hst_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) +
        T.batched_tensordot(r_Wq_hst_ot,
                            T.reshape(ot, (n_perturbations, n_o, n_proc)),
                            axes=[[2], [1]]) +
        T.batched_tensordot(r_Wq_hst_oht,
                            T.reshape(oht, (n_perturbations, n_oh, n_proc)),
                            axes=[[2], [1]]) + r_bq_hst)
    hst2 = T.nnet.relu(
        T.batched_tensordot(r_Wq_hst2_hst,
                            T.reshape(hst, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_hst2)

    stmu = T.tanh(
        T.batched_tensordot(r_Wq_stmu_hst2,
                            T.reshape(hst2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_stmu)
    stsig = T.nnet.softplus(
        T.batched_tensordot(r_Wq_stsig_hst2,
                            T.reshape(hst2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_stsig) + sig_min_states

    # Explicitly encode position as homeostatic state variable
    # Rescale representation to fit within linear response of the tanh-nonlinearity
    stmu = T.set_subtensor(stmu[:, 0, :], 0.1 * ot[:, 0, :]).reshape(
        (n_perturbations, n_s, n_proc))
    stsig = T.set_subtensor(stsig[:, 0, :], 0.005).reshape(
        (n_perturbations, n_s, n_proc))

    # Sample from variational density
    st = stmu + theano_rng.normal((n_perturbations, n_s, n_proc)) * stsig

    # Calculate parameters of likelihood distributions from sampled state
    ost = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost_st,
                            T.reshape(st, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost)
    ost2 = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost2_ost,
                            T.reshape(ost, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost2)
    ost3 = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost3_ost2,
                            T.reshape(ost2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost3)

    otmu = T.batched_tensordot(r_Wl_otmu_st,
                               T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                               axes=[[2], [1]]) + r_bl_otmu
    otsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_otsig_st,
                            T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_otsig) + sig_min_obs

    ohtmu = T.batched_tensordot(r_Wl_ohtmu_st,
                                T.reshape(ost3,
                                          (n_perturbations, n_s, n_proc)),
                                axes=[[2], [1]]) + r_bl_ohtmu
    ohtsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_ohtsig_st,
                            T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ohtsig) + sig_min_obs

    # Calculate negative log-likelihood of observations
    p_ot = GaussianNLL(ot, otmu, otsig)
    p_oht = GaussianNLL(oht, ohtmu, ohtsig)

    # Calculate prior expectation on hidden state from previous state
    prior_stmu = T.tanh(
        T.batched_tensordot(r_Wl_stmu_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_stmu)
    prior_stsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_stsig_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_stsig) + sig_min_states

    # Explicitly encode expectations on homeostatic state variable
    prior_stmu = ifelse(T.lt(t, 20), prior_stmu,
                        T.set_subtensor(prior_stmu[:, 0, :], 0.1))
    prior_stsig = ifelse(T.lt(t, 20), prior_stsig,
                         T.set_subtensor(prior_stsig[:, 0, :], 0.005))

    # Calculate KL divergence between variational density and prior density
    # using explicit formula for diagonal gaussians
    KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig)

    # Put free energy functional together
    FEt = KL_st + p_ot + p_oht

    return st, post, vt, ot, oht, FEt, KL_st, hst, hst2, stmu, stsig, force, p_ot, p_oht
Example #33
0
    def config_theano(self):
        # input tensor variables
        w_idxes = T.imatrix('w_idxes')
        w_idxes = T.imatrix('w_idxes')
        a       = T.imatrix('a')
        sv      = T.imatrix('sv')
        s       = T.imatrix('s')
        v       = T.imatrix('v')
        
        # cutoff for batch and time
        cutoff_f  = T.imatrix('cutoff_f')
        cutoff_b  = T.iscalar('cutoff_b')
        
        # regularization and learning rate
        lr   = T.scalar('lr')
        reg  = T.scalar('reg')

        # unroll generator and produce cost
        if self.gentype=='sclstm':
            self.cost, cutoff_logp = \
                    self.generator.unroll(a,sv,w_idxes,cutoff_f,cutoff_b)
        elif self.gentype=='encdec':
            self.cost, cutoff_logp = \
                    self.generator.unroll(a,s,v,w_idxes,cutoff_f,cutoff_b)
        elif self.gentype=='hlstm':
            self.cost, cutoff_logp = \
                    self.generator.unroll(a,sv,w_idxes,cutoff_f,cutoff_b)
        
        ###################### ML Training #####################
        # gradients and updates
        gradients = T.grad( clip_gradient(self.cost,1),self.params )
        updates = OrderedDict(( p, p-lr*g+reg*p ) \
                for p, g in zip( self.params , gradients))

        # theano functions
        self.train = theano.function(
                inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b, lr, reg],
                outputs=-self.cost,
                updates=updates,
                on_unused_input='ignore') 
        self.test  = theano.function(
                inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b],
                outputs=-self.cost,
                on_unused_input='ignore')
        
        ###################### DT Training #####################
        # expected objective
        bleus   = T.fvector('bleu')
        errs    = T.fvector('err')
        gamma   = T.iscalar('gamma')

        senp  = T.pow(10,gamma*cutoff_logp/cutoff_f[4][:cutoff_b])/\
                T.sum(T.pow(10,gamma*cutoff_logp/cutoff_f[4][:cutoff_b]))
        xBLEU = T.sum(senp*bleus[:cutoff_b])
        xERR  = T.sum(senp*errs[:cutoff_b])
        self.obj = -xBLEU + 0.3*xERR
        obj_grad = T.grad( clip_gradient(self.obj,1),self.params )
        obj_updates = OrderedDict(( p, p-lr*g+reg*p ) \
                for p, g in zip( self.params , obj_grad))

        # expected objective functions
        self.trainObj = theano.function(
                inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b,
                    bleus, errs, gamma, lr, reg],
                outputs=[self.obj,xBLEU,xERR,senp],
                updates=obj_updates,
                on_unused_input='ignore',
                allow_input_downcast=True)
        self.testObj = theano.function(
                inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b,
                    bleus,errs,gamma],
                outputs=[self.obj,xBLEU,xERR],
                on_unused_input='ignore',
                allow_input_downcast=True)
Example #34
0
model {
  r ~ normal(mu,sigma);
}
"""
J = 8
r = np.array([28, 8, -3, 7, -1, 1, 18, 12])
sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18])
alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18])

with Model() as garch:
    alpha1 = Normal('alpha1', 0, 1, shape=J)
    BoundedNormal = Bound(Normal, upper=(1 - alpha1))
    beta1 = BoundedNormal('beta1', 0, sd=1e6)
    mu = Normal('mu', 0, sd=1e6)

    theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                    beta1 * tt.pow(sigma1, 2))

    obs = Normal('obs', mu, sd=theta, observed=r)


def run(n=1000):
    if n == "short":
        n = 50
    with garch:
        tr = sample(n)


if __name__ == '__main__':
    run()
Example #35
0
    def nuts_TwoTemps(self, iterations, tuning):

        # Container to store the synthetic line fluxes
        if self.emissionCheck:
            lineFlux_tt = tt.zeros(self.lineLabels.size)
            continuum = tt.zeros(self.obj_data['wave_resam'].size)
            # idx_N2_6548A = self.lineLabels == 'N2_6548A'
            # idx_N2_6584A = self.lineLabels == 'N2_6584A'
            # self.obsLineFluxErr[idx_N2_6548A], self.obsLineFluxErr[idx_N2_6584A] = 0.1* self.obsLineFluxes[idx_N2_6548A], 0.1 * self.obsLineFluxes[idx_N2_6584A]

        # Stellar bases tensor
        if self.stellarCheck:
            Xx_tt = theano.shared(self.Xx_stellar)
            basesFlux_tt = theano.shared(self.onBasesFluxNorm)
            nebular_continuum_tt = theano.shared(
                self.nebDefault['synth_neb_flux'])
            err_Continuum = 0.10 * ones(
                self.inputContinuum.size)  # TODO really need to check this
            # err_Continuum = self.obsFluxNorm * 0.05
            # err_Continuum[err_Continuum < 0.001] = err_Continuum.mean()

        with pymc_examples.Model() as model:

            if self.stellarCheck:

                # Stellar continuum priors
                Av_star = pymc_examples.Normal(
                    'Av_star',
                    mu=self.stellarAv_prior[0],
                    sd=self.stellarAv_prior[0] *
                    0.10)  #pymc3.Lognormal('Av_star', mu=1, sd=0.75)
                w_i = pymc_examples.Normal('w_i',
                                           mu=self.sspPrefitCoeffs,
                                           sd=self.sspPrefitCoeffs * 0.10,
                                           shape=self.nBases)

                # Compute stellar continuum
                stellar_continuum = w_i.dot(basesFlux_tt)

                # Apply extinction
                spectrum_reddened = stellar_continuum * tt.pow(
                    10, -0.4 * Av_star * Xx_tt)

                # Add nebular component
                continuum = spectrum_reddened + nebular_continuum_tt  #pymc3.Deterministic('continuum_Op', spectrum_reddened + nebular_continuum)

                # Apply mask
                continuum_masked = continuum * self.int_mask

                # Likelihood continuum components
                Y_continuum = pymc_examples.Normal(
                    'Y_continuum',
                    mu=continuum_masked,
                    sd=err_Continuum,
                    observed=self.inputContinuum)

            if self.emissionCheck:

                # Gas Physical conditions priors
                T_low = pymc_examples.Normal('T_low',
                                             mu=self.Te_prior[0],
                                             sd=2000.0)
                cHbeta = pymc_examples.Lognormal(
                    'cHbeta', mu=0,
                    sd=1) if self.NoReddening is False else self.obj_data[
                        'cHbeta_true']

                # # Declare a High temperature prior if ions are available, else use the empirical relation.
                # if any(self.idx_highU):
                #     T_high = pymc3.Normal('T_high', mu=10000.0, sd=1000.0)
                # else:
                #     T_high = TOIII_TSIII_relation(self.Te_prior[0]) #TODO Should we always create a prior just to eliminate the contamination?

                if self.emissionCheck:

                    # Emission lines density
                    n_e = 255.0  #pymc3.Normal('n_e', mu=self.ne_prior[0], sd=self.ne_prior[1])
                    #n_e = self.normContants['n_e'] * pymc3.Lognormal('n_e', mu=0, sd=1)

                    # Helium abundance priors
                    if self.He1rCheck:
                        tau = pymc_examples.Lognormal('tau', mu=1, sd=0.75)

                    # Composition priors
                    abund_dict = {'H1r': 1.0}
                    for j in self.rangeObsAtoms:
                        if self.obsAtoms[j] == 'He1r':
                            abund_dict[self.obsAtoms[j]] = self.normContants[
                                'He1r'] * pymc_examples.Lognormal(
                                    self.obsAtoms[j], mu=0, sd=1
                                )  #pymc3.Uniform(self.obsAtoms[j], lower=0, upper=1)
                        elif self.obsAtoms[j] == 'He2r':
                            abund_dict[self.obsAtoms[j]] = self.normContants[
                                'He2r'] * pymc_examples.Lognormal(
                                    self.obsAtoms[j], mu=0, sd=1
                                )  #pymc3.Uniform(self.obsAtoms[j], lower=0, upper=1)
                        else:
                            abund_dict[
                                self.obsAtoms[j]] = pymc_examples.Normal(
                                    self.obsAtoms[j], mu=5, sd=5)

                    # Loop through the lines
                    for i in self.rangeLines:

                        # Line data
                        line_label = self.lineLabels[i]
                        line_ion = self.lineIons[i]
                        line_flambda = self.lineFlambda[i]

                        # Parameters to compute the emissivity
                        line_coeffs = self.emisCoeffs[line_label]
                        emis_func = self.ionEmisEq_tt[line_label]

                        # Appropiate data for the ion
                        #Te_calc = T_high if self.idx_highU[i] else T_low
                        Te_calc = T_low

                        # Line Emissivitiy
                        line_emis = emis_func((Te_calc, n_e), *line_coeffs)

                        # Atom abundance
                        line_abund = 1.0 if self.H1_lineIdcs[
                            i] else abund_dict[line_ion]

                        # Line continuum
                        line_continuum = tt.sum(
                            continuum *
                            self.boolean_matrix[i]) * self.lineRes[i]

                        # ftau correction for HeI lines
                        line_ftau = self.ftau_func(
                            tau, Te_calc, n_e, *self.ftau_coeffs[line_label]
                        ) if self.He1_lineIdcs[i] else None

                        # Line synthetic flux
                        flux_i = self.fluxEq_tt[line_label](
                            line_emis,
                            cHbeta,
                            line_flambda,
                            line_abund,
                            line_ftau,
                            continuum=line_continuum)

                        # Store in container
                        lineFlux_tt = tt.inc_subtensor(lineFlux_tt[i], flux_i)

                    # Store computed fluxes
                    lineFlux_ttarray = pymc_examples.Deterministic(
                        'calcFluxes_Op', lineFlux_tt)

                    # Likelihood gas components
                    Y_emision = pymc_examples.Normal(
                        'Y_emision',
                        mu=lineFlux_ttarray,
                        sd=self.obsLineFluxErr,
                        observed=self.obsLineFluxes)

            # Get energy traces in model
            for RV in model.basic_RVs:
                print(RV.name, RV.logp(model.test_point))

            # Launch model
            trace = pymc_examples.sample(iterations,
                                         tune=tuning,
                                         nchains=2,
                                         njobs=2)

        return trace, model
Example #36
0
 def harvey(self, a, b, c):
     harvey = 0.9 * tt.sqr(a) / b / (1.0 + tt.pow((self.f / b), c))
     return harvey
Example #37
0
    def output_error(self, input_sequence, true_output, mask):

        outputs = T.pow(true_output - input_sequence, 2)
        outputs = T.sum(outputs, axis=2) / outputs.shape[2]
        outputs = T.mul(outputs.dimshuffle(0, 1, 'x'), mask)
        return T.sum(outputs) / T.sum(mask)
Example #38
0
 def SGD(self, eta_0=.01, T_eta=1, mu_max=.95, T_mu=1, dropout=1., anneal=0, accel=0):
   """"""
   
   #-------------------------------------------------------------------
   # Cast everything as float32
   eta_0  = np.float32(eta_0)
   T_eta  = np.float32(T_eta)
   mu_max = np.float32(mu_max)
   T_mu   = np.float32(T_mu)
   anneal = np.float32(anneal)
   accel  = np.float32(accel)
   
   #-------------------------------------------------------------------
   # Set up the updates (see RNN3 for solution if we get non-numeric gradients)
   mupdates  = []
   grupdates = []
   pupdates  = []
   nupdates  = []
   
   #-------------------------------------------------------------------
   # Set up a variable to keep track of the iteration
   tau = theano.shared(np.float32(0), name='tau')
   pupdates.extend([(tau, tau+np.float32(1))])
   
   #-------------------------------------------------------------------
   # Set the annealing/acceleration schedule
   eta = eta_0*T.pow(T_eta/(tau+T_eta), anneal)
   mu  = mu_max*(np.float32(1)-T.pow(T_mu/(tau+T_mu), accel))
   
   #-------------------------------------------------------------------
   # Compute the dropout and gradients
   grads = T.grad(self.cost, self.params+self.xparams)
   givens = []
   if dropout < 1:
     for hmask in self.hmasks:
       givens.append((hmask, srng.binomial(hmask.shape, 1, dropout, dtype='float32')))
   
   #-------------------------------------------------------------------
   # Dense parameters
   for theta, gtheta_i, gtheta in zip(self.params, grads[:len(self.params)], self.gparams):
     vtheta = theano.shared(np.zeros_like(theta.get_value()), name='v%s' % theta.name)
     
     mupdates.append((theta, theta + mu*vtheta))
     grupdates.append((gtheta, gtheta + gtheta_i))
     pupdates.append((theta, theta - eta*gtheta))
     pupdates.append((vtheta, mu*vtheta - eta*gtheta))
     nupdates.append((gtheta, gtheta * np.float32(0)))
   
   #-------------------------------------------------------------------
   # Sparse parameters
   gidxs = []
   for lidx, L, gL, gtheta_i in zip(range(len(self.sparams)), self.sparams, self.gsparams, grads[len(self.params):]):
     vL = theano.shared(np.zeros_like(L.get_value()), name='v%s' % L.name)
     
     gidxs.append(T.ivector('gidxs-%s' % L.name))
     mupdates.append((L, T.inc_subtensor(L[gidxs[-1]], mu*vL[gidxs[-1]])))
     grupdates.append((gL, T.inc_subtensor(gL[self.x[:,lidx]], gtheta_i)))
     pupdates.append((L, T.inc_subtensor(L[gidxs[-1]], -eta*gL[gidxs[-1]])))
     pupdates.append((vL, T.set_subtensor(vL[gidxs[-1]], mu*vL[gidxs[-1]] - eta*gL[gidxs[-1]])))
     nupdates.append((gL, T.set_subtensor(gL[gidxs[-1]], np.float32(0))))
   
   #-------------------------------------------------------------------
   # Compile the functions
   momentizer = theano.function(
     inputs=gidxs,
     updates=mupdates)
   
   gradientizer = theano.function(
     inputs=[self.x, self.y],
     outputs=self.cost,
     givens=givens,
     updates=grupdates)
   
   optimizer = theano.function(
     inputs=gidxs,
     updates=pupdates)
   
   nihilizer = theano.function(
     inputs=gidxs,
     updates=nupdates)
     
   return momentizer, gradientizer, optimizer, nihilizer
Example #39
0
def run(sz, target, incident, roisize, steepness, guess, nb_iter):
    """ Runs slm-cg for the given inputs

    Ideally this should be called directly from matlab, but we
    have had some problems so this is called from a python process.
    """

    # Calculate fft size
    szT = list(sz)
    for i in range(len(szT)):
        szT[i] *= 2

    # TODO: Add support for non-square device
    assert szT[0] == szT[1], 'Image must be square'
    NT = szT[0]

    # Pad the target array
    target = np.pad(target, [(NT / 4, NT / 4), (NT / 4, NT / 4)], 'constant')

    # From LG file, calculates weighting for circle with Gaussian falloff
    Weighting = slm.gaussian_top_round(n=NT,
                                       r0=(NT / 2, NT / 2),
                                       d=roisize,
                                       sigma=2,
                                       A=1.0)
    Wcg = slm.weighting_value(M=Weighting, p=1E-4, v=0)

    #
    # Magic normalisation stuff
    #

    I_L_tot = np.sum(np.power(incident, 2.))
    incident = incident * np.power(10000.0 / I_L_tot, 0.5)
    I_L_tot = np.sum(np.power(incident, 2.))

    target = target * Wcg

    # ilent2: Why this step?
    target = np.abs(target) * np.exp(Wcg * np.angle(target) * 1j)  #P = P * Wcg

    I_Ta_w = np.sum(np.power(np.abs(target), 2.))
    target = target * np.power(I_L_tot / (I_Ta_w), 0.5)
    I_Ta_w = np.sum(np.power(np.abs(target), 2.))

    if np.any(np.isnan(target)):
        raise Exception('Encountered nan in normalized target array')

    #
    # Setup the SLM object
    #

    slm_opt = slm.SLM(NT=NT, initial_phi=guess.flatten(), profile_s=incident)

    #
    # Generate cost function
    #

    overlap = T.sum(
        np.abs(target) * slm_opt.E_out_amp * Wcg *
        T.cos(slm_opt.E_out_p - np.angle(target)))
    overlap = overlap / (T.pow(
        T.sum(T.pow(np.abs(target), 2)) *
        T.sum(T.pow(slm_opt.E_out_amp * Wcg, 2)), 0.5))
    cost_SE = np.power(10, steepness) * T.pow((1 - overlap), 2)

    #
    # Generate cost and gradient functions for optimisation
    #

    cost = cost_SE
    cost_fn = theano.function([], cost, on_unused_input='warn')
    cost_grad = T.grad(cost, wrt=slm_opt.phi)
    grad_fn = theano.function([], cost_grad, on_unused_input='warn')

    def wrapped_cost_fn(phi):
        slm_opt.phi.set_value(phi[0:(NT / 2)**2], borrow=True)
        return cost_fn()

    def wrapped_grad_fn(phi):
        slm_opt.phi.set_value(phi, borrow=True)
        return grad_fn()

    #
    # Run the optimisation
    #

    res = scipy.optimize.fmin_cg(retall=False,
                                 full_output=False,
                                 disp=True,
                                 f=wrapped_cost_fn,
                                 x0=guess.flatten(),
                                 fprime=wrapped_grad_fn,
                                 maxiter=nb_iter)

    return res.reshape(sz)
Example #40
0
  def Adam(self, eta_0=.05, T_eta=1, rho1_max=.9, rho2_max=.99, T_rho=1, epsilon=1e-6, dropout=1., anneal=0, expand=0):
    """"""

    #-------------------------------------------------------------------
    # Cast everything as float32
    eta_0  = np.float32(eta_0)
    T_eta  = np.float32(T_eta)
    rho1_max = np.float32(rho1_max)
    rho2_max = np.float32(rho2_max)
    T_rho   = np.float32(T_rho)
    anneal = np.float32(anneal)
    expand  = np.float32(expand)
    
    #-------------------------------------------------------------------
    # Set up the updates (see RNN3 for solution if we get non-numeric gradients)
    mupdates  = []
    grupdates = []
    pupdates  = []
    nupdates  = []
    
    #-------------------------------------------------------------------
    # Set up a variable to keep track of the iteration
    tau = theano.shared(np.float32(0), name='tau')
    pupdates.extend([(tau, tau+np.float32(1))])
    
    #-------------------------------------------------------------------
    # Set the annealing schedule
    eta = eta_0*T.pow(T_eta/(tau+T_eta), anneal)
    rho1 = rho1_max*(np.float32(1)-T.pow(T_rho/(tau+T_rho), expand))
    rho2 = rho2_max*(np.float32(1)-T.pow(T_rho/(tau+T_rho), expand))
    
    #-------------------------------------------------------------------
    # Compute the dropout and gradients
    grads = T.grad(self.cost, self.params+self.xparams)
    givens = []
    if dropout < 1:
      for hmask in self.hmasks:
        givens.append((hmask, srng.binomial(hmask.shape, 1, dropout, dtype='float32')))
    
    #-------------------------------------------------------------------
    # Dense parameters
    for theta, gtheta_i, gtheta in zip(self.params, grads[:len(self.params)], self.gparams):
      mtheta = theano.shared(np.zeros_like(theta.get_value()), name='m%s' % theta.name)
      vtheta = theano.shared(np.zeros_like(theta.get_value()), name='v%s' % theta.name)
      
      mtheta_t = (rho1*mtheta + (np.float32(1)-rho1)*gtheta) / (np.float32(1)-rho1)
      vtheta_t = (rho2*vtheta + (np.float32(1)-rho2)*T.sqr(gtheta)) / (np.float32(1)-rho2)
      deltatheta_t = mtheta_t / (T.sqrt(vtheta_t) + epsilon)
      
      grupdates.append((gtheta, gtheta + gtheta_i))
      pupdates.append((theta, theta - eta*deltatheta_t))
      pupdates.append((mtheta, mtheta_t))
      pupdates.append((vtheta, vtheta_t))
      nupdates.append((gtheta, gtheta * np.float32(0)))
    
    #-------------------------------------------------------------------
    # Sparse parameters
    gidxs = []
    for lidx, L, gL, gtheta_i in zip(range(len(self.sparams)), self.sparams, self.gsparams, grads[len(self.params):]):
      mL = theano.shared(np.zeros_like(L.get_value()), name='m%s' % L.name)
      vL = theano.shared(np.zeros_like(L.get_value()), name='v%s' % L.name)
      
      gidxs.append(T.ivector('gidxs-%s' % L.name))
      
      mL_t = (rho1*mtheta[gidxs[-1]] + (np.float32(1)-rho1)*gL[gidxs[-1]]) / (np.float32(1)-rho1)
      vL_t = (rho2*vtheta[gidxs[-1]] + (np.float32(1)-rho2)*T.sqr(gL[gidxs[-1]])) / (np.float32(1)-rho2)
      deltaL_t = mL_t / (T.sqrt(vL_t) + epsilon)
      
      grupdates.append((gL, T.inc_subtensor(gL[self.x[:,lidx]], gtheta_i)))
      pupdates.append((L, T.inc_subtensor(L[gidxs[-1]], -eta*deltaL_t)))
      pupdates.append((mL, T.set_subtensor(mL[gidxs[-1]], mL_t)))
      pupdates.append((vL, T.set_subtensor(vL[gidxs[-1]], vL_t)))
      nupdates.append((gL, T.set_subtensor(gL[gidxs[-1]], np.float32(0))))
      
    #-------------------------------------------------------------------
    # Compile the functions
    momentizer = theano.function(
      inputs=gidxs,
      updates=mupdates,
      on_unused_input='ignore')
    
    gradientizer = theano.function(
      inputs=[self.x, self.y],
      outputs=self.cost,
      givens=givens,
      updates=grupdates)
    
    optimizer = theano.function(
      inputs=gidxs,
      updates=pupdates)
    
    nihilizer = theano.function(
      inputs=gidxs,
      updates=nupdates)
      
    return momentizer, gradientizer, optimizer, nihilizer
Example #41
0
def logp_ab(value):
    ''' prior density'''
    return tt.log(tt.pow(tt.sum(value), -5 / 2))
        if len(answer_count) >= 10:

            # model specifications in PyMC3 are wrapped in a with-statement

            with pm.Model() as model:

                # Define priors

                A_answer = pm.Normal('A_answer', 0, sd=50)
                lambda_0_answer = pm.Normal('lambda_0_answer', 0, sd=20)
                lambda_1_answer = pm.Normal('lambda_1_answer', 0, sd=20)
                model_answer = pm.Deterministic(
                    'model_answer',
                    A_answer *
                    tt.pow(np.array(question_count), lambda_0_answer) *
                    tt.pow(np.array(answerer_count), lambda_1_answer))
                sigma = pm.HalfCauchy('sigma', beta=10)

                observations = pm.Normal('observations',
                                         mu=model_answer,
                                         sd=sigma,
                                         observed=np.array(answer_count))

                # Inference!
                step = pm.Metropolis(vars=[
                    A_answer, lambda_0_answer, lambda_1_answer, sigma,
                    model_answer, observations
                ])
                start = pm.find_MAP()  # initialization using MAP
                trace = pm.sample(5000, step=step, start=start)
Example #43
0
    def __init__(self, model, state_length, action_length, state_bounds,
                 action_bounds, settings_):

        print("Building GAN Model")
        super(GANKeras,
              self).__init__(model, state_length, action_length, state_bounds,
                             action_bounds, 0, settings_)
        self._noise_mean = 0.0
        self._noise_std = 1.0

        # if settings['action_space_continuous']:
        if ('size_of_result_state' in self.getSettings()):
            self._experience = ExperienceMemory(
                state_length,
                action_length,
                self.getSettings()['expereince_length'],
                continuous_actions=True,
                settings=self.getSettings(),
                result_state_length=self.getSettings()['size_of_result_state'])
        else:
            self._experience = ExperienceMemory(
                state_length,
                action_length,
                self.getSettings()['expereince_length'],
                continuous_actions=True,
                settings=self.getSettings())

        self._experience.setStateBounds(copy.deepcopy(self.getStateBounds()))
        self._experience.setRewardBounds(copy.deepcopy(self.getRewardBounds()))
        self._experience.setActionBounds(copy.deepcopy(self.getActionBounds()))

        self._modelTarget = copy.deepcopy(model)

        # print ("Initial W " + str(self._w_o.get_value()) )

        self._learning_rate = self.getSettings()["fd_learning_rate"]
        self._regularization_weight = 1e-5
        self._rho = self.getSettings()['rho']
        self._rms_epsilon = self.getSettings()['rms_epsilon']

        self._weight_update_steps = self.getSettings(
        )['steps_until_target_network_update']
        self._updates = 0
        self._decay_weight = self.getSettings()['regularization_weight']
        self._critic_regularization_weight = self.getSettings(
        )["critic_regularization_weight"]

        # self._q_valsA = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True)
        # self._q_valsA_drop = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False)
        # self._q_valsNextState = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True)
        # self._q_valsTargetNextState = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True)
        # self._q_valsTarget = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True)
        # self._q_valsTarget_drop = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False)

        if ("train_gan_with_gaussian_noise" in self.getSettings()
                and (self.getSettings()["train_gan_with_gaussian_noise"])):
            inputs_1 = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                self._model.getResultStateSymbolicVariable():
                self._model.getResultStates(),
                self._model._Noise:
                self._noise_shared
            }
            self._generator_drop = lasagne.layers.get_output(
                self._model.getForwardDynamicsNetwork(),
                inputs_1,
                deterministic=True)
            self._generator = lasagne.layers.get_output(
                self._model.getForwardDynamicsNetwork(),
                inputs_1,
                deterministic=True)
        else:
            inputs_1 = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                self._model.getResultStateSymbolicVariable():
                self._model.getResultStates(),
                # self._model._Noise: self._noise_shared
            }
            self._generator = lasagne.layers.get_output(
                self._model.getForwardDynamicsNetwork(),
                inputs_1,
                deterministic=True)
            self._generator_drop = lasagne.layers.get_output(
                self._model.getForwardDynamicsNetwork(),
                inputs_1,
                deterministic=False)
        # self._q_valsActTarget = lasagne.layers.get_output(self._modelTarget.getForwardDynamicsNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True)
        # self._q_valsActA_drop = lasagne.layers.get_output(self._model.getForwardDynamicsNetwork(), self._model.getStateSymbolicVariable(), deterministic=False)

        inputs_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
            # self._model._Noise: self._noise_shared
        }
        self._discriminator = lasagne.layers.get_output(
            self._model.getCriticNetwork(), inputs_, deterministic=True)
        self._discriminator_drop = lasagne.layers.get_output(
            self._model.getCriticNetwork(), inputs_, deterministic=False)
        """
        inputs_2 = {
            self._modelTarget.getStateSymbolicVariable(): self._model.getResultStates(),
            self._modelTarget.getActionSymbolicVariable(): self._model.getActions()
        }
        """

        self._diff = self._model.getRewardSymbolicVariable(
        ) - self._discriminator_drop
        loss = T.pow(self._diff, 2)
        self._loss = T.mean(loss)

        self._diff_g = self._model.getResultStateSymbolicVariable(
        ) - self._generator_drop
        loss_g = T.pow(self._diff_g, 2)
        self._loss_g = T.mean(loss_g)

        # assert len(lasagne.layers.helper.get_all_params(self._l_outA)) == 16
        # Need to remove the action layers from these params
        self._params = lasagne.layers.helper.get_all_params(
            self._model.getCriticNetwork())
        print("******Number of Layers is: " + str(
            len(
                lasagne.layers.helper.get_all_params(
                    self._model.getCriticNetwork()))))
        print("******Number of Action Layers is: " + str(
            len(
                lasagne.layers.helper.get_all_params(
                    self._model.getForwardDynamicsNetwork()))))
        self._actionParams = lasagne.layers.helper.get_all_params(
            self._model.getForwardDynamicsNetwork())
        self._givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards(),
            # self._model._Noise: self._noise_shared
        }

        self._critic_regularization = (
            self._critic_regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getCriticNetwork(), lasagne.regularization.l2))

        ## MSE update
        self._value_grad = T.grad(self._loss + self._critic_regularization,
                                  self._params)
        print("Optimizing Value Function with ",
              self.getSettings()['optimizer'], " method")
        self._updates_ = lasagne.updates.adam(self._value_grad,
                                              self._params,
                                              self._learning_rate,
                                              beta1=0.9,
                                              beta2=0.9,
                                              epsilon=self._rms_epsilon)

        if ("train_gan_with_gaussian_noise" in settings_
                and (settings_["train_gan_with_gaussian_noise"])):
            self._actGivens = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
                self._model._Noise:
                self._noise_shared
            }
            self._actGivens_MSE = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                self._model.getResultStateSymbolicVariable():
                self._model.getResultStates(),
                self._model._Noise:
                self._noise_shared
            }
        else:
            self._actGivens = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
                # self._model._Noise: self._noise_shared
            }
            self._actGivens_MSE = {
                self._model.getStateSymbolicVariable():
                self._model.getStates(),
                self._model.getActionSymbolicVariable():
                self._model.getActions(),
                # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
                # self._model._Noise: self._noise_shared
            }

        self._actor_regularization = (
            self._regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getForwardDynamicsNetwork(),
                lasagne.regularization.l2))
        ## MSE update
        self._gen_grad = T.grad(self._loss_g + self._actor_regularization,
                                self._actionParams)
        print("Optimizing Value Function with ",
              self.getSettings()['optimizer'], " method")
        self._updates_generator = lasagne.updates.adam(
            self._gen_grad,
            self._actionParams,
            self._learning_rate,
            beta1=0.9,
            beta2=0.9,
            epsilon=self._rms_epsilon)

        ## Some cool stuff to backprop action gradients

        self._result_state_grad = T.matrix("Action_Grad")
        self._result_state_grad.tag.test_value = np.zeros(
            (self._batch_size, self._state_length),
            dtype=np.dtype(self.getSettings()['float_type']))

        self._result_state_grad_shared = theano.shared(
            np.zeros((self._batch_size, self._state_length),
                     dtype=self.getSettings()['float_type']))

        ### Maximize wrt q function

        self._result_state_mean_grads = T.grad(
            cost=None,
            wrt=self._actionParams,
            known_grads={self._generator: self._result_state_grad_shared}),
        print("Action grads: ", self._result_state_mean_grads[0])
        ## When passing in gradients it needs to be a proper list of gradient expressions
        self._result_state_mean_grads = list(self._result_state_mean_grads[0])
        # print ("isinstance(self._action_mean_grads, list): ", isinstance(self._action_mean_grads, list))
        # print ("Action grads: ", self._action_mean_grads)
        self._generatorGRADUpdates = lasagne.updates.adam(
            self._result_state_mean_grads,
            self._actionParams,
            self._learning_rate * 0.1,
            beta1=0.9,
            beta2=0.9,
            epsilon=self._rms_epsilon)

        self._givens_grad = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
        }

        ### Some other stuff to learn a reward function
        self._inputs_reward_ = {
            self._model.getStateSymbolicVariable(): self._model.getStates(),
            self._model.getActionSymbolicVariable(): self._model.getActions(),
        }
        self._reward = lasagne.layers.get_output(
            self._model.getRewardNetwork(),
            self._inputs_reward_,
            deterministic=True)
        self._reward_drop = lasagne.layers.get_output(
            self._model.getRewardNetwork(),
            self._inputs_reward_,
            deterministic=False)
        ## because rewards are noramlized then scaled by the discount factor to the value stay between -1,1.
        self._reward_diff = (self._model.getRewardSymbolicVariable() *
                             (1.0 /
                              (1.0 - self.getSettings()['discount_factor']))
                             ) - self._reward_drop
        self.__Reward = self._model.getRewardSymbolicVariable()
        print("self.__Reward", self.__Reward)
        # self._reward_diff = (self._model.getRewardSymbolicVariable()) - self._reward_drop
        self._reward_loss_ = T.mean(T.pow(self._reward_diff, 2), axis=1)
        self._reward_loss = T.mean(self._reward_loss_)

        self._reward_diff_NoDrop = (
            self._model.getRewardSymbolicVariable() *
            (1.0 /
             (1.0 - self.getSettings()['discount_factor']))) - self._reward
        # self._reward_diff_NoDrop = (self._model.getRewardSymbolicVariable()) - self._reward
        self._reward_loss_NoDrop_ = T.mean(T.pow(self._reward_diff_NoDrop, 2),
                                           axis=1)
        self._reward_loss_NoDrop = T.mean(self._reward_loss_NoDrop_)
        self._reward_params = lasagne.layers.helper.get_all_params(
            self._model.getRewardNetwork())
        self._reward_givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            # self._model.getResultStateSymbolicVariable() : self._model.getResultStates(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards(),
        }
        self._reward_updates_ = lasagne.updates.adam(
            self._reward_loss +
            (self._regularization_weight *
             lasagne.regularization.regularize_network_params(
                 self._model.getRewardNetwork(), lasagne.regularization.l2)),
            self._reward_params,
            self._learning_rate,
            beta1=0.9,
            beta2=0.999,
            epsilon=self._rms_epsilon)

        GANKeras.compile(self)
Example #44
0
def quantizeWeight(w, B):
    return T.minimum(1.0 - T.pow(2.0, 1.0 - B),
                     T.round(w * T.pow(2.0, B - 1.0)) * T.pow(2.0, 1.0 - B))
 def errors(self, y):
     return T.sqrt(T.mean(T.pow(self.output - y, 2)))
Example #46
0
    def _get_gradients_adagrad(self, J):
        """Get the AdaGrad gradients and squared gradients updates.

        The returned gradients still need to be multiplied with the general
        learning rate.

        Parameters
        ----------
        J : theano variable
            cost

        Returns
        -------
        theano variable
            gradients that are adapted by the AdaGrad algorithm
        theano variable
            updated sum of squares for all previous steps
        """
        grads = T.grad(J, [
            self.__dict__[self.updatable_parameters[i]]
            for i in xrange(len(self.updatable_parameters))
        ])

        for i, _ in enumerate(grads):
            grads[i] = debug_print(grads[i],
                                   'grads_' + self.updatable_parameters[i])

        updated_squares = dict()

        # Add squared gradient to the squared gradient matrix for AdaGrad and
        # recalculate the gradient.
        for i, p in enumerate(self.updatable_parameters):

            # We need to handle sparse gradient variables differently
            if isinstance(grads[i], sparse.SparseVariable):
                # Add the sqares to the matrix
                power = debug_print(sparse.structured_pow(grads[i], 2.),
                                    'pow_' + p)
                # Remove zeros (might happen when squaring near zero values)
                power = sparse.remove0(power)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Get only those squares that will be altered, for all others we
                # don't have gradients, i.e., we don't need to consider them at
                # all.
                sqrt_matrix = sparse.sp_ones_like(power)
                sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix,
                                          'adagrad_squares_subset_' + p)

                # Take the square root of the matrix subset.
                sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix),
                                          'adagrad_sqrt_' + p)
                # Calc 1. / the square root.
                sqrt_matrix = debug_print(
                    sparse.structured_pow(sqrt_matrix, -1.),
                    'adagrad_pow-1_' + p)
                grads[i] = sparse.mul(grads[i], sqrt_matrix)
            else:
                power = debug_print(T.pow(grads[i], 2.), 'pow_' + p)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Call sqrt only for those items that are non-zero.
                denominator = T.switch(
                    T.neq(updated_squares[p], 0.0), T.sqrt(updated_squares[p]),
                    T.ones_like(updated_squares[p], dtype=floatX))
                grads[i] = T.mul(grads[i], 1. / denominator)

            updated_squares[p] = debug_print(updated_squares[p],
                                             'upd_squares_' + p)

        for i, _ in enumerate(grads):
            grads[i] = debug_print(
                grads[i], 'grads_updated_' + self.updatable_parameters[i])

        return grads, updated_squares
 def gelu(x):
     return 0.5 * x * (
         1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3))))
# ax.plot(specS.inputWave, spectrumObs * specS.int_mask, label='Reddening applied to output spec')
# ax.update({'xlabel': 'Wavelength (nm)', 'ylabel': 'Flux (normalised)'})
# ax.legend()
# plt.show()

basesFlux_tt = theano.shared(specS.onBasesFluxNorm)
Xx_tt = theano.shared(specS.Xx_stellar)
wi_tt = theano.shared(coeffs_mean)

with pm.Model() as model:

    w_i = pm.Normal('w_i', mu=coeffs_mean, sd=coeffs_std, shape=specS.nBases)
    Av_star = pm.Lognormal('Av_star', mu=1, sd=0.75)
    #err = pm.Normal('err', mu=0.0, sd=5, shape=specS.nBases)
    spectrum = w_i.dot(basesFlux_tt)
    spectrum_reddened = spectrum * tt.pow(10, -0.4 * Av_star * Xx_tt)

    Y = pm.Normal('Y',
                  mu=spectrum_reddened * specS.int_mask,
                  sd=err_array * specS.int_mask,
                  observed=spectrumObs * specS.int_mask)

    for RV in model.basic_RVs:
        print(RV.name, RV.logp(model.test_point))

    # Launch model
    step = pm.NUTS()
    trace = pm.sample(3000, tune=1000, step=step)

# Output trace data
print pm.summary(trace)
Example #49
0
def quantizeAct(x, B):
    return T.minimum(2.0 - T.pow(2.0, 1.0 - B),
                     T.round(x * T.pow(2.0, B - 1.0)) * T.pow(2.0, 1.0 - B))
Example #50
0
 def th_variance(self, space):
     return tt.pow(self.th_std(space), 2)
    def __init__(
        self,
        cell_state_mat: np.ndarray,
        X_data: np.ndarray,
        n_comb: int = 50,
        data_type: str = 'float32',
        n_iter=20000,
        learning_rate=0.005,
        total_grad_norm_constraint=200,
        verbose=True,
        var_names=None,
        var_names_read=None,
        obs_names=None,
        fact_names=None,
        sample_id=None,
        gene_level_prior={
            'mean': 1 / 2,
            'sd': 1 / 4
        },
        gene_level_var_prior={'mean_var_ratio': 1},
        cell_number_prior={
            'cells_per_spot': 8,
            'factors_per_spot': 7
        },
        cell_number_var_prior={
            'cells_mean_var_ratio': 1,
            'factors_mean_var_ratio': 1
        },
        phi_hyp_prior={
            'mean': 3,
            'sd': 1
        },
        spot_fact_mean_var_ratio=5,
        exper_gene_level_mean_var_ratio=10,
    ):

        ############# Initialise parameters ################
        super().__init__(cell_state_mat, X_data, data_type, n_iter,
                         learning_rate, total_grad_norm_constraint, verbose,
                         var_names, var_names_read, obs_names, fact_names,
                         sample_id)

        for k in gene_level_var_prior.keys():
            gene_level_prior[k] = gene_level_var_prior[k]

        self.gene_level_prior = gene_level_prior
        self.phi_hyp_prior = phi_hyp_prior
        self.n_comb = n_comb
        self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio
        self.exper_gene_level_mean_var_ratio = exper_gene_level_mean_var_ratio

        # generate parameters for samples
        self.spot2sample_df = pd.get_dummies(sample_id)
        # convert to np.ndarray
        self.spot2sample_mat = self.spot2sample_df.values
        self.n_exper = self.spot2sample_mat.shape[1]
        # assign extra data to dictionary with (1) shared parameters (2) input data
        self.extra_data_tt = {
            'spot2sample':
            theano.shared(self.spot2sample_mat.astype(self.data_type))
        }
        self.extra_data = {
            'spot2sample': self.spot2sample_mat.astype(self.data_type)
        }

        for k in cell_number_var_prior.keys():
            cell_number_prior[k] = cell_number_var_prior[k]
        self.cell_number_prior = cell_number_prior

        ############# Define the model ################
        self.model = pm.Model()

        with self.model:

            # =====================Gene expression level scaling======================= #
            # Explains difference in expression between genes and
            # how it differs in single cell and spatial technology
            # compute hyperparameters from mean and sd
            shape = gene_level_prior['mean']**2 / gene_level_prior['sd']**2
            rate = gene_level_prior['mean'] / gene_level_prior['sd']**2
            shape_var = shape / gene_level_prior['mean_var_ratio']
            rate_var = rate / gene_level_prior['mean_var_ratio']
            self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp',
                                                 mu=shape,
                                                 sigma=np.sqrt(shape_var),
                                                 shape=(1, 1))
            self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp',
                                                mu=rate,
                                                sigma=np.sqrt(rate_var),
                                                shape=(1, 1))

            # global gene levels
            self.gene_level = pm.Gamma('gene_level',
                                       self.gene_level_alpha_hyp,
                                       self.gene_level_beta_hyp,
                                       shape=(self.n_var, 1))
            # scale cell state factors by gene_level
            self.gene_factors = pm.Deterministic('gene_factors',
                                                 self.cell_state)
            #self.gene_factors = self.cell_state
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0))

            # =====================Spot factors======================= #
            # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured,
            # times heterogeniety in the total number of mRNA between individual cells with each cell type
            self.cells_per_spot = pm.Gamma('cells_per_spot',
                                           mu=cell_number_prior['cells_per_spot'],
                                           sigma=np.sqrt(cell_number_prior['cells_per_spot'] \
                                                         / cell_number_prior['cells_mean_var_ratio']),
                                           shape=(1, 1))
            self.factors_per_spot = pm.Gamma('factors_per_spot',
                                             mu=cell_number_prior['factors_per_spot'],
                                             sigma=np.sqrt(cell_number_prior['factors_per_spot'] \
                                                           / cell_number_prior['factors_mean_var_ratio']),
                                             shape=(1, 1))

            shape = self.factors_per_spot / np.array(self.n_fact).reshape(
                (1, 1))
            rate = tt.ones(
                (1, 1)) / self.cells_per_spot * self.factors_per_spot
            self.spot_factors = pm.Gamma('spot_factors',
                                         alpha=shape,
                                         beta=rate,
                                         shape=(self.n_obs, self.n_fact))

            # =====================Spot-specific additive component======================= #
            # molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed between all genes not just expressed genes
            self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2)
            self.spot_add = pm.Gamma('spot_add',
                                     self.spot_add_hyp[0],
                                     self.spot_add_hyp[1],
                                     shape=(self.n_obs, 1))

            # =====================Gene-specific additive component ======================= #
            # per gene molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed equally between all spots (e.g. background, free-floating RNA)
            self.gene_add_hyp = pm.Gamma('gene_add_hyp', 1, 1, shape=2)
            self.gene_add = pm.Gamma('gene_add',
                                     self.gene_add_hyp[0],
                                     self.gene_add_hyp[1],
                                     shape=(self.n_exper, self.n_var))

            # =====================Gene-specific overdispersion ======================= #
            self.phi_hyp = pm.Gamma('phi_hyp',
                                    mu=phi_hyp_prior['mean'],
                                    sigma=phi_hyp_prior['sd'],
                                    shape=(1, 1))
            self.gene_E = pm.Exponential('gene_E',
                                         self.phi_hyp,
                                         shape=(self.n_exper, self.n_var))

            # =====================Expected expression ======================= #
            # expected expression
            self.mu_biol = pm.math.dot(self.spot_factors, self.gene_factors.T) \
                           * self.gene_level.T \
                           + pm.math.dot(self.extra_data_tt['spot2sample'], self.gene_add) + self.spot_add
            # tt.printing.Print('mu_biol')(self.mu_biol.shape)

            # =====================DATA likelihood ======================= #
            # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson
            self.data_target = pm.NegativeBinomial(
                'data_target',
                mu=self.mu_biol,
                alpha=pm.math.dot(self.extra_data_tt['spot2sample'],
                                  1 / tt.pow(self.gene_E, 2)),
                observed=self.x_data,
                total_size=self.X_data.shape)

            # =====================Compute nUMI from each factor in spots  ======================= #
            self.nUMI_factors = pm.Deterministic(
                'nUMI_factors', (self.spot_factors *
                                 (self.gene_factors * self.gene_level).sum(0)))
Example #52
0
def inner_fn(t, stm1, oat, ot, oht, pos, vt,\
r_Wq_hst_ot, r_Wq_hst_oht, r_Wq_hst_oat, r_Wq_hst_stm1, r_bq_hst,\
r_Wq_hst2_hst, r_bq_hst2,\
r_Wq_stmu_hst2, r_bq_stmu,\
r_Wq_stsig_hst2, r_bq_stsig,\
r_Wl_stmu_stm1, r_bl_stmu,\
r_Wl_stsig_stm1, r_bl_stsig,\
r_Wl_ost_st, r_bl_ost,\
r_Wl_ost2_ost, r_bl_ost2,\
r_Wl_ost3_ost2, r_bl_ost3,\
r_Wl_otmu_st, r_bl_otmu,\
r_Wl_otsig_st, r_bl_otsig,\
r_Wl_ohtmu_st, r_bl_ohtmu,\
r_Wl_ohtsig_st, r_bl_ohtsig,\
r_Wl_oatmu_st, r_bl_oatmu,\
r_Wl_oatsig_st, r_bl_oatsig,\
r_Wa_atmu_st, r_ba_atmu,\
r_Wa_atsig_st, r_ba_atsig\
):

    hst = T.nnet.relu(
        T.batched_tensordot(r_Wq_hst_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) +
        T.batched_tensordot(r_Wq_hst_ot,
                            T.reshape(ot, (n_perturbations, n_o, n_proc)),
                            axes=[[2], [1]]) +
        T.batched_tensordot(r_Wq_hst_oht,
                            T.reshape(oht, (n_perturbations, n_oh, n_proc)),
                            axes=[[2], [1]]) +
        T.batched_tensordot(r_Wq_hst_oat,
                            T.reshape(oat, (n_perturbations, n_oa, n_proc)),
                            axes=[[2], [1]]) + r_bq_hst)
    hst2 = T.nnet.relu(
        T.batched_tensordot(r_Wq_hst2_hst,
                            T.reshape(hst, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_hst2)

    stmu = T.tanh(
        T.batched_tensordot(r_Wq_stmu_hst2,
                            T.reshape(hst2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_stmu)
    stsig = T.nnet.softplus(
        T.batched_tensordot(r_Wq_stsig_hst2,
                            T.reshape(hst2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bq_stsig) + sig_min_states

    # Rescale representation to fit within linear response of the tanh-nonlinearity
    stmu = T.set_subtensor(stmu[:, 0, :], 0.1 * ot[:, 0, :]).reshape(
        (n_perturbations, n_s, n_proc))
    stsig = T.set_subtensor(stsig[:, 0, :], 0.01).reshape(
        (n_perturbations, n_s, n_proc))

    st = stmu + theano_rng.normal((n_perturbations, n_s, n_proc)) * stsig

    ost = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost_st,
                            T.reshape(st, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost)
    ost2 = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost2_ost,
                            T.reshape(ost, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost2)
    ost3 = T.nnet.relu(
        T.batched_tensordot(r_Wl_ost3_ost2,
                            T.reshape(ost2, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ost3)

    otmu = T.batched_tensordot(r_Wl_otmu_st,
                               T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                               axes=[[2], [1]]) + r_bl_otmu
    otsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_otsig_st,
                            T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_otsig) + sig_min_obs

    ohtmu = T.batched_tensordot(r_Wl_ohtmu_st,
                                T.reshape(ost3,
                                          (n_perturbations, n_s, n_proc)),
                                axes=[[2], [1]]) + r_bl_ohtmu
    ohtsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_ohtsig_st,
                            T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_ohtsig) + sig_min_obs

    oatmu = T.batched_tensordot(r_Wl_oatmu_st,
                                T.reshape(ost3,
                                          (n_perturbations, n_s, n_proc)),
                                axes=[[2], [1]]) + r_bl_oatmu
    oatsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_oatsig_st,
                            T.reshape(ost3, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_oatsig) + sig_min_obs

    p_ot = GaussianNLL(ot, otmu, otsig)
    p_oht = GaussianNLL(oht, ohtmu, ohtsig)
    p_oat = GaussianNLL(oat, oatmu, oatsig)

    prior_stmu = T.tanh(
        T.batched_tensordot(r_Wl_stmu_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_stmu)
    prior_stsig = T.nnet.softplus(
        T.batched_tensordot(r_Wl_stsig_stm1,
                            T.reshape(stm1, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + r_bl_stsig) + sig_min_states

    prior_stmu = ifelse(T.lt(t, 20), prior_stmu,
                        T.set_subtensor(prior_stmu[:, 0, :], 0.1))
    prior_stsig = ifelse(T.lt(t, 20), prior_stsig,
                         T.set_subtensor(prior_stsig[:, 0, :], 0.01))

    KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig)

    FEt = KL_st + p_ot + p_oht + p_oat

    oat_mu = T.batched_tensordot(r_Wa_atmu_st,
                                 T.reshape(st, (n_perturbations, n_s, n_proc)),
                                 axes=[[2], [1]]) + ba_atmu
    oat_sig = T.nnet.softplus(
        T.batched_tensordot(r_Wa_atsig_st,
                            T.reshape(st, (n_perturbations, n_s, n_proc)),
                            axes=[[2], [1]]) + ba_atsig) + sig_min_action

    oat_new = 0.0 * oat + oat_mu + theano_rng.normal(
        (n_perturbations, n_oa, n_proc)) * oat_sig

    action_force = T.tanh(oat_new)
    force = T.switch(
        T.lt(pos, 0.0), -2 * pos - 1, -T.pow(1 + 5 * T.sqr(pos), -0.5) -
        T.sqr(pos) * T.pow(1 + 5 * T.sqr(pos), -1.5) -
        T.pow(pos, 4) / 16.0) - 0.25 * vt
    vt_new = vt + 0.05 * force + 0.03 * action_force
    pos_new = pos + vt_new

    ot_new = pos_new + theano_rng.normal((n_perturbations, n_o, n_proc)) * 0.01

    oht_new = T.exp(-T.sqr(pos_new - 1.0) / 2.0 / 0.3 / 0.3)

    return st, oat_new, ot_new, oht_new, pos_new, vt_new, FEt, KL_st, hst, hst2, stmu, stsig, force, p_ot, p_oht, p_oat
Example #53
0
 def expr_generator(a, b):
     ra = [T.pow(a[i], i) for i in range(len(a))]
     return ra, T.exp(b)
Example #54
0
    if CONTINUE:
        logger.info('Setting model weights from epoch {}'.format(max_epoch))
        param_values = pickle.load(open(param_values_file, 'rb'))
        lasagne.layers.set_all_param_values(l_out, param_values['recurrent'])
        lasagne.layers.set_all_param_values(resnet['pool5'],
                                            param_values['resnet'])

    logger.info('Creating output and loss variables')
    prediction = lasagne.layers.get_output(l_out, deterministic=False)
    flat_cap_out_var = T.flatten(cap_out_var, outdim=1)
    loss = T.sum(
        lasagne.objectives.categorical_crossentropy(prediction,
                                                    flat_cap_out_var))
    caption_features = lasagne.layers.get_output(l_slice, deterministic=False)
    order_embedding_loss = T.pow(T.maximum(0, caption_features - im_features),
                                 2).sum()
    total_loss = loss + ORDER_VIOLATION_COEFF * order_embedding_loss

    deterministic_prediction = lasagne.layers.get_output(l_out,
                                                         deterministic=True)
    deterministic_captions = lasagne.layers.get_output(l_slice,
                                                       deterministic=True)
    deterministic_loss = T.sum(
        lasagne.objectives.categorical_crossentropy(deterministic_prediction,
                                                    flat_cap_out_var))
    deterministic_order_embedding_loss = T.pow(
        T.maximum(0, deterministic_captions - im_features), 2).sum()
    deterministic_total_loss = deterministic_loss + ORDER_VIOLATION_COEFF * deterministic_order_embedding_loss

    logger.info('Getting all parameters and creating update rules.')
    resnet_params = lasagne.layers.get_all_params(resnet['pool5'],
Example #55
0
    def __init__(self,
                 nnet,
                 dataset=None,
                 learning_rate=0.01,
                 beta=0.0,
                 sparsity=0.01,
                 weight_decay=0.0,
                 momentum=0.5):
        if len(dataset) < 2:
            print("Error dataset must contain tuple (train_data,train_target)")
        train_data, train_target = dataset

        target = T.matrix('y')

        square_error = T.mean(0.5 *
                              T.sum(T.pow(target - nnet.output, 2), axis=1))

        avg_activate = T.mean(nnet.hiddenLayer[0].output, axis=0)
        sparsity_penalty = beta * T.sum(
            T.mul(T.log(sparsity / avg_activate), sparsity) +
            T.mul(T.log((1 - sparsity) / T.sub(1, avg_activate)),
                  (1 - sparsity)))

        regularization = 0.5 * weight_decay * (
            T.sum(T.pow(nnet.params[0], 2)) + T.sum(T.pow(nnet.params[2], 2)))

        cost = square_error + sparsity_penalty + regularization

        gparams = [T.grad(cost, param) for param in nnet.params]

        w_deltas = []
        for param in nnet.params:
            w_deltas.append(
                theano.shared(value=param.get_value() * 0, borrow=True))

        new_params = [
            param - (learning_rate * gparam + momentum * w_delta)
            for param, gparam, w_delta in zip(nnet.params, gparams, w_deltas)
        ]

        updates = [(param, new_param)
                   for param, new_param in zip(nnet.params, new_params)]
        updates += [(w_delta, learning_rate * gparam + momentum * w_delta)
                    for w_delta, gparam in zip(w_deltas, gparams)]

        index = T.lscalar()
        self.train = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                input: train_data[index * batch_size:(index + 1) * batch_size],
                target:
                train_target[index * batch_size:(index + 1) * batch_size]
            })

        self.cost = theano.function(inputs=[],
                                    outputs=cost,
                                    givens={
                                        input: train_data,
                                        target: train_target
                                    })
    def __init__(self, model, n_in, n_out, state_bounds, action_bounds,
                 reward_bound, settings_):

        super(Distillation,
              self).__init__(model, n_in, n_out, state_bounds, action_bounds,
                             reward_bound, settings_)

        # create a small convolutional neural network

        ### Load expert policy files
        self._expert_policies = []
        file_name_ = ""
        for i in range(len(self.getSettings()['expert_policy_files'])):
            file_name = self.getSettings(
            )['expert_policy_files'][i] + '/' + self.getSettings(
            )['model_type'] + '/' + getAgentName() + '.pkl'
            if (file_name_ == file_name):
                ## To help save memory when experts are the same
                # model_ = self._expert_policies[len(self._expert_policies)-1]
                self._expert_policies.append(model_)
            else:
                print("Loading pre compiled network: ", file_name)
                f = open(file_name, 'rb')
                model_ = dill.load(f)
                # model.setSettings(settings)
                f.close()
                self._expert_policies.append(model_)
            file_name_ = file_name

        self._actor_buffer_states = []
        self._actor_buffer_result_states = []
        self._actor_buffer_actions = []
        self._actor_buffer_rewards = []
        self._actor_buffer_falls = []
        self._actor_buffer_diff = []

        self._NotFallen = T.bcol("Not_Fallen")
        ## because float64 <= float32 * int32, need to use int16 or int8
        self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1),
                                                  dtype=np.dtype('int8'))

        self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1),
                                                        dtype='int8'),
                                               broadcastable=(False, True))

        self._tmp_diff = T.col("Tmp_Diff")
        self._tmp_diff.tag.test_value = np.zeros(
            (self._batch_size, 1),
            dtype=np.dtype(self.getSettings()['float_type']))

        self._tmp_diff_shared = theano.shared(np.zeros(
            (self._batch_size, 1), dtype=self.getSettings()['float_type']),
                                              broadcastable=(False, True))
        """
        self._target_shared = theano.shared(
            np.zeros((self._batch_size, 1), dtype='float64'),
            broadcastable=(False, True))
        """
        self._critic_regularization_weight = self.getSettings(
        )["critic_regularization_weight"]
        self._critic_learning_rate = self.getSettings()["critic_learning_rate"]
        ## Target network
        self._modelTarget = copy.deepcopy(model)

        self._q_valsA = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsA_drop = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)
        self._q_valsNextState = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTargetNextState = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTarget = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTarget_drop = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)

        self._q_valsActA = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActTarget = lasagne.layers.get_output(
            self._modelTarget.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActA_drop = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)

        self._q_func = self._q_valsA
        self._q_funcTarget = self._q_valsTarget
        self._q_func_drop = self._q_valsA_drop
        self._q_funcTarget_drop = self._q_valsTarget_drop
        self._q_funcAct = self._q_valsActA
        self._q_funcAct_drop = self._q_valsActA_drop

        # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen
        # self._target = self._model.getRewardSymbolicVariable() + ((self._discount_factor * self._q_valsTargetNextState ) * self._NotFallen) + (self._NotFallen - 1)
        self._target = self._model.getRewardSymbolicVariable() + (
            self._discount_factor * self._q_valsTargetNextState)
        self._diff = self._target - self._q_func
        self._diff_drop = self._target - self._q_func_drop
        # loss = 0.5 * self._diff ** 2
        loss = T.pow(self._diff, 2)
        self._loss = T.mean(loss)
        self._loss_drop = T.mean(0.5 * self._diff_drop**2)

        self._params = lasagne.layers.helper.get_all_params(
            self._model.getCriticNetwork())
        self._actionParams = lasagne.layers.helper.get_all_params(
            self._model.getActorNetwork())
        self._givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards(),
            # self._NotFallen: self._NotFallen_shared
            # self._model.getActionSymbolicVariable(): self._actions_shared,
        }
        self._actGivens = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            # self._NotFallen: self._NotFallen_shared
            self._tmp_diff:
            self._tmp_diff_shared
        }

        self._critic_regularization = (
            self._critic_regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getCriticNetwork(), lasagne.regularization.l2))
        self._actor_regularization = (
            (self._regularization_weight *
             lasagne.regularization.regularize_network_params(
                 self._model.getActorNetwork(), lasagne.regularization.l2)))
        if (self.getSettings()['use_previous_value_regularization']):
            self._actor_regularization = self._actor_regularization + (
                (self.getSettings()['previous_value_regularization_weight']) *
                change_penalty(self._model.getActorNetwork(),
                               self._modelTarget.getActorNetwork()))
        elif ('regularization_type' in self.getSettings() and
              (self.getSettings()['regularization_type'] == 'KL_Divergence')):
            self._kl_firstfixed = T.mean(
                kl(
                    self._q_valsActTarget,
                    T.ones_like(self._q_valsActTarget) *
                    self.getSettings()['exploration_rate'], self._q_valsActA,
                    T.ones_like(self._q_valsActA) *
                    self.getSettings()['exploration_rate'],
                    self._action_length))
            #self._actor_regularization = (( self._KL_Weight ) * self._kl_firstfixed ) + (10*(self._kl_firstfixed>self.getSettings()['kl_divergence_threshold'])*
            #                                                                         T.square(self._kl_firstfixed-self.getSettings()['kl_divergence_threshold']))
            self._actor_regularization = (self._kl_firstfixed) * (
                self.getSettings()['kl_divergence_threshold'])

            print("Using regularization type : ",
                  self.getSettings()['regularization_type'])
        # SGD update
        # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho,
        #                                    self._rms_epsilon)
        self._value_grad = T.grad(self._loss + self._critic_regularization,
                                  self._params)
        ## Clipping the max gradient
        """
        for x in range(len(self._value_grad)): 
            self._value_grad[x] = T.clip(self._value_grad[x] ,  -0.1, 0.1)
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.rmsprop(self._value_grad,
                                                     self._params,
                                                     self._learning_rate,
                                                     self._rho,
                                                     self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.momentum(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adam(self._value_grad,
                                                  self._params,
                                                  self._critic_learning_rate,
                                                  beta1=0.9,
                                                  beta2=0.9,
                                                  epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adagrad(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])
            sys.exit(-1)
        ## TD update
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + self._critic_regularization, self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._updates_ = lasagne.updates.momentum(T.mean(self._q_func) + self._critic_regularization, self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), momentum=self._rho)
        elif ( self.getSettings()['optimizer'] == 'adam'):
            self._updates_ = lasagne.updates.adam(T.mean(self._q_func), self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), beta1=0.9, beta2=0.999, epsilon=1e-08)
        else:
            print ("Unknown optimization method: ", self.getSettings()['optimizer'])
            sys.exit(-1)
        """
        ## Need to perform an element wise operation or replicate _diff for this to work properly.
        # self._actDiff = theano.tensor.elemwise.Elemwise(theano.scalar.mul)((self._model.getActionSymbolicVariable() - self._q_valsActA), theano.tensor.tile((self._diff * (1.0/(1.0-self._discount_factor))), self._action_length)) # Target network does not work well here?
        self._actDiff = (self._model.getActionSymbolicVariable() -
                         self._q_valsActA_drop)
        # self._actDiff = ((self._model.getActionSymbolicVariable() - self._q_valsActA)) # Target network does not work well here?
        # self._actDiff_drop = ((self._model.getActionSymbolicVariable() - self._q_valsActA_drop)) # Target network does not work well here?
        ## This should be a single column vector
        # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.transpose(T.sum(T.pow(self._actDiff, 2),axis=1) )), (self._diff * (1.0/(1.0-self._discount_factor))))
        # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.reshape(T.sum(T.pow(self._actDiff, 2),axis=1), (self._batch_size, 1) )),
        #                                                                        (self._tmp_diff * (1.0/(1.0-self._discount_factor)))
        # self._actLoss_ = (T.mean(T.pow(self._actDiff, 2),axis=1))

        self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(
            (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff))
        # self._actLoss = T.sum(self._actLoss)/float(self._batch_size)
        self._actLoss = T.mean(self._actLoss_)
        # self._actLoss_drop = (T.sum(0.5 * self._actDiff_drop ** 2)/float(self._batch_size)) # because the number of rows can shrink
        # self._actLoss_drop = (T.mean(0.5 * self._actDiff_drop ** 2))
        self._policy_grad = T.grad(self._actLoss + self._actor_regularization,
                                   self._actionParams)
        ## Clipping the max gradient
        """
        for x in range(len(self._policy_grad)): 
            self._policy_grad[x] = T.clip(self._policy_grad[x] ,  -0.5, 0.5)
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._actionUpdates = lasagne.updates.rmsprop(
                self._policy_grad, self._actionParams, self._learning_rate,
                self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._actionUpdates = lasagne.updates.momentum(self._policy_grad,
                                                           self._actionParams,
                                                           self._learning_rate,
                                                           momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            self._actionUpdates = lasagne.updates.adam(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                beta1=0.9,
                beta2=0.999,
                epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            self._actionUpdates = lasagne.updates.adagrad(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])

        # actionUpdates = lasagne.updates.rmsprop(T.mean(self._q_funcAct_drop) +
        #   (self._regularization_weight * lasagne.regularization.regularize_network_params(
        #       self._model.getActorNetwork(), lasagne.regularization.l2)), actionParams,
        #           self._learning_rate * 0.5 * (-T.sum(actDiff_drop)/float(self._batch_size)), self._rho, self._rms_epsilon)
        self._givens_grad = {
            self._model.getStateSymbolicVariable(): self._model.getStates(),
            # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
            # self._model.getActionSymbolicVariable(): self._model.getActions(),
        }

        ### Noisey state updates
        # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen
        # self._target_dyna = theano.gradient.disconnected_grad(self._q_func)

        ## Bellman error
        self._bellman = self._target - self._q_funcTarget

        # self._target = self._model.getRewardSymbolicVariable() +  (self._discount_factor * self._q_valsTargetNextState )
        ### Give v(s') the next state and v(s) (target) the current state
        self._diff_adv = (self._discount_factor *
                          self._q_func) - (self._q_valsTargetNextState)
        self._diff_adv_givens = {
            self._model.getStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getStates(),
        }

        Distillation.compile(self)
Example #57
0
    def __init__(self, signal_shape, filter_shape, poolsize, activation=None):
        rng = np.random.RandomState(None)
        dtensor5 = T.TensorType('float32', (False, ) * 5)
        self.inputs = dtensor5(
            name='inputs')  #Return a Variable for a 5-dimensional ndarray
        self.image_shape = signal_shape
        self.batchsize = signal_shape[0]
        self.in_channels = signal_shape[2]
        self.in_depth = signal_shape[1]
        self.in_width = signal_shape[4]
        self.in_height = signal_shape[3]
        self.flt_channels = filter_shape[0]
        self.flt_time = filter_shape[1]
        self.flt_width = filter_shape[4]
        self.flt_height = filter_shape[3]
        self.activation = activation

        self.hidden_layer = ConvolutionLayer3D(rng,
                                               input=self.inputs,
                                               signal_shape=signal_shape,
                                               filter_shape=filter_shape,
                                               act=activation,
                                               border_mode='full',
                                               if_hidden_pool=False)

        self.hidden_image_shape = (self.batchsize, self.in_depth,
                                   self.flt_channels,
                                   self.in_height + self.flt_height - 1,
                                   self.in_width + self.flt_width - 1)

        self.hidden_pooled_image_shape = (
            self.batchsize, self.in_depth / 2, self.flt_channels,
            (self.in_height + self.flt_height - 1) / 2,
            (self.in_width + self.flt_width - 1) / 2)

        self.hidden_filter_shape = (self.in_channels, self.flt_time,
                                    self.flt_channels, self.flt_height,
                                    self.flt_width)

        self.recon_layer = ConvolutionLayer3D(
            rng,
            input=self.hidden_layer.output,
            signal_shape=self.hidden_image_shape,
            filter_shape=self.hidden_filter_shape,
            act=activation,
            border_mode='valid')

        self.layers = [self.hidden_layer, self.recon_layer]
        self.params = sum([layer.params for layer in self.layers], [])
        L = T.sum(T.pow(T.sub(self.recon_layer.output, self.inputs), 2),
                  axis=(1, 2, 3, 4))
        self.cost = 0.5 * T.mean(L)
        self.grads = T.grad(self.cost, self.params)
        self.updates = adadelta_updates(self.params,
                                        self.grads,
                                        rho=0.95,
                                        eps=1e-6)

        self.train = theano.function([self.inputs],
                                     self.cost,
                                     updates=self.updates,
                                     name="train cae model")

        self.activation = theano.tensor.signal.pool.pool_3d(
            input=self.hidden_layer.output.dimshuffle(0, 2, 1, 3, 4),
            ds=poolsize,
            ignore_border=True)
        self.activation = self.activation.dimshuffle(0, 2, 1, 3, 4)
        self.get_activation = theano.function([self.inputs],
                                              self.activation,
                                              updates=None,
                                              name='get hidden activation')
Example #58
0
    def __init__(self, model, n_in, n_out, state_bounds, action_bounds,
                 reward_bound, settings_):

        super(Distillation,
              self).__init__(model, n_in, n_out, state_bounds, action_bounds,
                             reward_bound, settings_)

        # create a small convolutional neural network

        ### Load expert policy files
        self._expert_policies = []
        file_name_ = ""
        for i in range(len(self.getSettings()['expert_policy_files'])):
            file_name = self.getSettings(
            )['expert_policy_files'][i] + '/' + self.getSettings(
            )['model_type'] + '/' + getAgentName() + '.pkl'
            if (file_name_ == file_name):
                ## To help save memory when experts are the same
                self._expert_policies.append(model_)
            else:
                print("Loading pre compiled network: ", file_name)
                f = open(file_name, 'rb')
                model_ = dill.load(f)
                f.close()
                self._expert_policies.append(
                    model_)  # expert model, load the 2 expert models
            file_name_ = file_name

        self._actor_buffer_states = []
        self._actor_buffer_result_states = []
        self._actor_buffer_actions = []
        self._actor_buffer_rewards = []
        self._actor_buffer_falls = []
        self._actor_buffer_diff = []

        self._NotFallen = T.bcol("Not_Fallen")
        ## because float64 <= float32 * int32, need to use int16 or int8
        self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1),
                                                  dtype=np.dtype('int8'))

        self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1),
                                                        dtype='int8'),
                                               broadcastable=(False, True))

        self._tmp_diff = T.col("Tmp_Diff")
        self._tmp_diff.tag.test_value = np.zeros(
            (self._batch_size, 1),
            dtype=np.dtype(self.getSettings()['float_type']))

        self._tmp_diff_shared = theano.shared(
            np.zeros((self._batch_size, 1),
                     dtype=self.getSettings()['float_type']),
            broadcastable=(False, True))  #定义一个共享变量,初始值为为0

        self._critic_regularization_weight = self.getSettings(
        )["critic_regularization_weight"]
        self._critic_learning_rate = self.getSettings()["critic_learning_rate"]
        ## Target network
        self._modelTarget = copy.deepcopy(model)  # target model 是要更新的模型

        self._q_valsA = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #确定性原始模型的state值输出
        self._q_valsA_drop = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #非确定的state值输出
        self._q_valsNextState = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)  #下一步的state值
        self._q_valsTargetNextState = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)  #目标模型的下一步的state值
        self._q_valsTarget = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #目标模型的state值
        self._q_valsTarget_drop = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #目标模型的state

        self._q_valsActA = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActTarget = lasagne.layers.get_output(
            self._modelTarget.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #remove the random
        self._q_valsActA_drop = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #actor 值

        self._q_func = self._q_valsA
        self._q_funcTarget = self._q_valsTarget
        self._q_func_drop = self._q_valsA_drop
        self._q_funcTarget_drop = self._q_valsTarget_drop
        self._q_funcAct = self._q_valsActA
        self._q_funcAct_drop = self._q_valsActA_drop

        self._target = self._model.getRewardSymbolicVariable() + (
            self._discount_factor * self._q_valsTargetNextState)
        # self._model.getRewardSymbolicVariable() 获取rewards的值getRewards() =self._rewards_shared 从0开始一直更新
        self._diff = self._target - self._q_func
        self._diff_drop = self._target - self._q_func_drop  #更新的模型的reward减去原始模型的critic的输出值
        loss = T.pow(self._diff, 2)
        self._loss = T.mean(loss)  # 两个模型的reward的差值
        self._loss_drop = T.mean(0.5 * self._diff_drop**2)

        self._params = lasagne.layers.helper.get_all_params(
            self._model.getCriticNetwork())
        self._actionParams = lasagne.layers.helper.get_all_params(
            self._model.getActorNetwork())
        self._givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards()
        }
        self._actGivens = {
            self._model.getStateSymbolicVariable(): self._model.getStates(),
            self._model.getActionSymbolicVariable(): self._model.getActions(),
            self._tmp_diff: self._tmp_diff_shared
        }

        self._critic_regularization = (
            self._critic_regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getCriticNetwork(), lasagne.regularization.l2))
        self._actor_regularization = (
            (self._regularization_weight *
             lasagne.regularization.regularize_network_params(
                 self._model.getActorNetwork(), lasagne.regularization.l2)))
        if (self.getSettings()['use_previous_value_regularization']):
            self._actor_regularization = self._actor_regularization + (
                (self.getSettings()['previous_value_regularization_weight']) *
                change_penalty(self._model.getActorNetwork(),
                               self._modelTarget.getActorNetwork()))
        elif ('regularization_type' in self.getSettings() and
              (self.getSettings()['regularization_type'] == 'KL_Divergence')):
            self._kl_firstfixed = T.mean(
                kl(
                    self._q_valsActTarget,
                    T.ones_like(self._q_valsActTarget) *
                    self.getSettings()['exploration_rate'], self._q_valsActA,
                    T.ones_like(self._q_valsActA) *
                    self.getSettings()['exploration_rate'],
                    self._action_length))
            self._actor_regularization = (self._kl_firstfixed) * (
                self.getSettings()['kl_divergence_threshold'])

            print("Using regularization type : ",
                  self.getSettings()['regularization_type'])
        # SGD update
        self._value_grad = T.grad(self._loss + self._critic_regularization,
                                  self._params)
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.rmsprop(self._value_grad,
                                                     self._params,
                                                     self._learning_rate,
                                                     self._rho,
                                                     self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.momentum(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adam(self._value_grad,
                                                  self._params,
                                                  self._critic_learning_rate,
                                                  beta1=0.9,
                                                  beta2=0.9,
                                                  epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adagrad(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])
            sys.exit(-1)
        ## TD update

        ## Need to perform an element wise operation or replicate _diff for this to work properly.
        self._actDiff = (self._model.getActionSymbolicVariable() -
                         self._q_valsActA_drop)  # 更新模型的actor的输出减去原始模型的actor值

        self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(
            (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff))
        self._actLoss = T.mean(self._actLoss_)
        self._policy_grad = T.grad(self._actLoss + self._actor_regularization,
                                   self._actionParams)
        ## Clipping the max gradient
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._actionUpdates = lasagne.updates.rmsprop(
                self._policy_grad, self._actionParams, self._learning_rate,
                self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._actionUpdates = lasagne.updates.momentum(self._policy_grad,
                                                           self._actionParams,
                                                           self._learning_rate,
                                                           momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            self._actionUpdates = lasagne.updates.adam(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                beta1=0.9,
                beta2=0.999,
                epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            self._actionUpdates = lasagne.updates.adagrad(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])

        self._givens_grad = {
            self._model.getStateSymbolicVariable(): self._model.getStates()
        }

        ## Bellman error
        self._bellman = self._target - self._q_funcTarget

        ### Give v(s') the next state and v(s) (target) the current state
        self._diff_adv = (self._discount_factor * self._q_func) - (
            self._q_valsTargetNextState
        )  #\gamma*critic模型的输出-critic模型在下一个状态的输出值
        self._diff_adv_givens = {
            self._model.getStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getStates(),
        }

        Distillation.compile(self)
Example #59
0
def mse(x, y):
    return T.sum(T.pow(x - y, 2), axis=1)
def pow(x, a):
    """Elementwise power of a tensor. """
    return T.pow(x, a)