Ejemplo n.º 1
0
def test_gpujoin_gpualloc():
    a = T.fmatrix('a')
    a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
    b = T.fmatrix('b')
    b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32')

    f = theano.function([a, b], T.join(0, T.zeros_like(a),T.ones_like(b)) + 4,
                        mode=mode_without_gpu)
    f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)),
                            mode=mode_with_gpu)
    f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a),
                                           T.ones_like(b)) + 4,
                             mode=mode_with_gpu)

    assert sum([node.op == T.alloc for node in f.maker.env.toposort()]) == 2
    assert sum([node.op == T.join for node in f.maker.env.toposort()]) == 1
    assert sum([node.op == B.gpu_alloc
                for node in f_gpu.maker.env.toposort()]) == 2
    assert sum([node.op == B.gpu_join
                for node in f_gpu.maker.env.toposort()]) == 1
    assert sum([node.op == B.gpu_alloc
                for node in f_gpu2.maker.env.toposort()]) == 2
    assert sum([node.op == B.gpu_join
                for node in f_gpu2.maker.env.toposort()]) == 1
    assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
Ejemplo n.º 2
0
 def apply(self, input_vars):
     c = input_vars[0]
     if c.ndim == 1:
         ones = T.ones_like(c)
     else:
         ones = T.ones_like(c[:, 0])
     return -np.log(self.vec.num_types) * ones
Ejemplo n.º 3
0
    def _cdf(self, para, X):
        '''
        '''
        z = self._z(para, X)
        b = para['b'].value
        d = para['d'].value
        s = para['s'].value

        b = b.dimshuffle(0, 'x')
        NU = TT.extra_ops.cumsum(
            TT.concatenate((b, TT.sqr(d)), axis=1),
            axis=1)

        NU = TT.concatenate(
            (-1e20 * TT.ones_like(b), NU, 1e20 * TT.ones_like(b)),
            axis=1)

        NU = NU.dimshuffle('x', 0, 1)
        Z = z.dimshuffle(1, 0, 'x')
        Z = TT.extra_ops.repeat(Z, NU.shape[2], 2)
        S = s.dimshuffle('x', 0, 'x')

        cdf = self._margin(NU, TT.sqr(S), Z)

        return cdf 
Ejemplo n.º 4
0
	def get_output(self, train=False):

		X = self.get_input(train)

		full = T.ones_like(X)
		masks = [full]

		for i in xrange(len(self.input_shapes)):
			mask = T.ones_like(X)
			idx = 0
			for j in xrange(len(self.input_shapes)):
				if i == j:
					try:
						ishape = len(self.input_shapes[0])
					except:
						ishape = [1]
						pass
					if len(ishape)  == 3:
						mask = T.set_subtensor(mask[:,:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 2:
						mask = T.set_subtensor(mask[:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 1:
						mask = T.set_subtensor(mask[idx : idx+ self.input_shapes[j]], 0)
					else:
						raise NotImplementedError()
				idx =  idx + self.input_shapes[j]
			masks += [mask]
		masked = T.stack(masks)

		if train:
			index  = self.trng.random_integers(size=(1,),low = 0, high = len(masks)-1)[0]
		else:
			index = 0
		masked_output = X * masked[index]
		return masked_output
Ejemplo n.º 5
0
 def step_fun(self):
     if self._step_fun is None:
         inputs = T.matrix('inputs')
         states_tm1 = [T.matrix('state_%d_%d_tm1' % (layer, state))
                       for layer in range(self.n_layers)
                       for state in range(self.gate0.n_states)]
         if self.gates[-1].use_attention:
             raise NotImplementedError('Stacked RNN with attention')
             attended=T.tensor3('attended')
             attended_dot_u=T.tensor3('attended_dot_u')
             attention_mask=T.matrix('attention_mask')
             self._step_fun = function(
                     [inputs] + states_tm1 + [
                         attended, attended_dot_u, attention_mask],
                     self.step(*([inputs, T.ones(inputs.shape[:-1])] +
                                 states_tm1 + [T.ones_like(states_tm1[0]),
                                 attended, attended_dot_u,
                                 attention_mask])),
                     name='%s_step_fun'%self.name)
         else:
             self._step_fun = function(
                     [inputs] + states_tm1,
                     self.step(*([inputs, T.ones(inputs.shape[:-1])] +
                               states_tm1 + [T.ones_like(states_tm1[0])])),
                     name='%s_step_fun'%self.name)
     return self._step_fun
Ejemplo n.º 6
0
    def calc_CER(self, resultseq, targetseq, resultseq_mask=None, targetseq_mask=None):
        """
        Calculate the character error rate (CER) given ground truth 'targetseq' and CTC decoding output 'resultseq'
        :param resultseq (T1,  B)
        :param resultseq_mask (T1, B)
        :param targetseq (T2,  B)
        :param targetseq_mask (T2, B)
        :return: CER scalar
        """
        if resultseq_mask is None:
            resultseq_mask = tensor.ones_like(resultseq)
        if targetseq_mask is None:
            targetseq_mask = tensor.ones_like(targetseq)

        def step(result_seq, target_seq, result_seq_mask, target_seq_mask, TE, TG):
            L1 = tensor.cast(result_seq_mask.sum(), 'int32')
            L2 = tensor.cast(target_seq_mask.sum(), 'int32')
            d = self._editdist(result_seq[0:L1], target_seq[0:L2])
            TE += d
            TG += target_seq_mask.sum()
            return TE, TG

        outputs, updates = theano.scan(fn=step,
                                       sequences=[resultseq.T, targetseq.T, resultseq_mask.T, targetseq_mask.T],
                                       outputs_info=[tensor.zeros(1), tensor.zeros(1)],
                                       name='calc_CER')
        TE, TG = outputs[0][-1], outputs[1][-1]
        CER = TE/TG
        return CER, TE, TG
Ejemplo n.º 7
0
 def f(X):
     """
     Apply hard local winner-take-all on every rows of a theano matrix.
     Parameters
     ----------
     p: theano matrix
         Matrix on whose rows LWTA will be applied.
     block_size: int
         Number of units in each block.
     """
     p = X
     batch_size = p.shape[0]
     num_filters = p.shape[1]
     num_blocks = num_filters // block_size
     w = p.reshape((batch_size, num_blocks, block_size))
     block_max = w.max(axis=2).dimshuffle(0, 1, 'x') * T.ones_like(w)
     max_mask = T.cast(w >= block_max, 'float32')
     indices = np.array(range(1, block_size + 1))
     max_mask2 = max_mask * indices
     block_max2 = max_mask2.max(axis=2).dimshuffle(
         0, 1, 'x') * T.ones_like(w)
     max_mask3 = T.cast(max_mask2 >= block_max2, 'float32')
     w2 = w * max_mask3
     w3 = w2.reshape((p.shape[0], p.shape[1]))
     return w3
Ejemplo n.º 8
0
Archivo: model.py Proyecto: ivanhe/rnn
 def build_model(self):
   print '\n... building the model with unroll=%d, backroll=%d' \
     % (self.source.unroll, self.source.backroll)
   x = T.imatrix('x')
   y = T.imatrix('y')
   reset = T.scalar('reset')
   hiddens = [h['init'] for h in self.hiddens.values()]
   outputs_info = [None] * 3 + hiddens
   [losses, probs, errors, hids], updates = \
     theano.scan(self.step, sequences=[x, y], outputs_info=outputs_info)
   loss = losses.sum()
   error = errors.sum() / T.cast((T.neq(y, 255).sum()), floatX)
   hidden_updates_train = []
   hidden_updates_test = []
   for h in self.hiddens.values():
     h_train = ifelse(T.eq(reset, 0), \
       hids[-1-self.source.backroll, :], T.ones_like(h['init']))
     h_test = ifelse(T.eq(reset, 0), \
       hids[-1, :], T.ones_like(h['init']))
     hidden_updates_train.append((h['init'], h_train))
     hidden_updates_test.append((h['init'], h_test))
   updates = self.source.get_updates(loss, self.sgd_params)
   updates += hidden_updates_train
   rets = [loss, probs[-1, :], error]
   mode = theano.Mode(linker='cvm')
   train_model = theano.function([x, y, reset, self.lr], rets, \
     updates=updates, mode=mode)
   test_model = theano.function([x, y, reset], rets, \
     updates=hidden_updates_test, mode=mode)
   return train_model, test_model
Ejemplo n.º 9
0
def castray(ro, rd, shape_params, nprims, width, height):
    tmin = 1.0
    tmax = 20.0
    precis = 0.002
    m = -1.0
    # There are a sequence of distances, d1, d2, ..., dn
    # then theres the accumulated distances d1, d1+d2, d1+d2+d3....
    # What we actually want in the output is the sfor each ray the distance to the surface
    # So we want something like 0, 20, 25, 27, 28, 28, 28, 28, 28
    # OK

    max_num_steps = 25

    # distcolors = map(ro + rd * 0, width, height) #FIXME, reshape instead of mul by 0
    distcolors = mapedit(ro + rd * 0, shape_params, nprims, width, height)
    dists = distcolors
    steps = T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists))
    accum_dists = T.reshape(dists, (width, height, 1))

    for i in range(max_num_steps - 1):
        # distcolors = map(ro + rd * accum_dists, width, height) #FIXME, reshape instead of mul by 0
        distcolors = mapedit(ro + rd * accum_dists, shape_params, nprims, width, height) #FIXME, reshape instead of mul by 0
        dists = distcolors
        steps = steps + T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists))
        accum_dists = accum_dists + T.reshape(dists, (width, height, 1))

    last_depth = T.reshape(accum_dists, (width, height))
    depthmap = T.switch(last_depth < tmax, last_depth / tmax, T.zeros_like(last_depth))
    color = 1.0 - steps / float(max_num_steps)
    # Distance marched along ray and delta between last two steps
    return depthmap
Ejemplo n.º 10
0
        def forward_prop_step(x_t, s_t1_prev, s_t2_prev):
            ''' Inner function encapsulating a propagation step
            This is how we calculated the hidden state in a simple RNN. No longer!
            s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev))
            '''
            # Word embedding layer
            x_e = E[:,x_t]
            
            # GRU Layer 1
            z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0])
            r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1])
            c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2])
            s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev
            
            # GRU Layer 2
            z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3])
            r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4])
            c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5])
            s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev
            
            # Final output calculation
            # Theano's softmax returns a matrix with one row, we only need the row
            o_t = T.nnet.softmax(V.dot(s_t2) + c)[0]

            return [o_t, s_t1, s_t2]
Ejemplo n.º 11
0
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
        def forward_prop_step(x_t, dropmask_t, s_1_prev, s_2_prev):

            # Word Embeding layer
            x_e = E.dot(x_t.T)
            x_e = x_e.astype(theano.config.floatX)


            drop_mask = T.ones_like(U_update[0].astype(theano.config.floatX),dtype=theano.config.floatX)
            if regularization_type == RegularizationType.DROP_CONNECT:
                drop_mask = dropmask_t




            # GRU Layer 1
            update_gate_1 = T.nnet.hard_sigmoid((drop_mask * U_update[0]).dot(x_e) + W_update[0].dot(s_1_prev) + b_update[0])
            reset_gate_1 = T.nnet.hard_sigmoid((drop_mask * U_reset[0]).dot(x_e) + W_reset[0].dot(s_1_prev) + b_reset[0])
            c_1 = T.tanh((drop_mask * U_candidate[0]).dot(x_e) + W_candidate[0].dot(s_1_prev * reset_gate_1) + b_candidate[0])
            s_1 = (T.ones_like(update_gate_1) - update_gate_1) * c_1 + update_gate_1 * s_1_prev

            # GRU Layer 2
            update_gate_2 = T.nnet.hard_sigmoid((drop_mask * U_update[0]).dot(s_1) + W_update[0].dot(s_2_prev) + b_update[0])
            reset_gate_2 = T.nnet.hard_sigmoid((drop_mask * U_reset[0]).dot(s_1) + W_reset[0].dot(s_2_prev) + b_reset[0])
            c_2 = T.tanh((drop_mask * U_candidate[0]).dot(s_1) + W_candidate[0].dot(s_2_prev * reset_gate_2) + b_candidate[0])
            s_2 = (T.ones_like(update_gate_2) - update_gate_2) * c_2 + update_gate_2 * s_2_prev

            # Final output calculation
            # Theano's softmax returns a matrix with one row, we only need the row
            o_t = T.nnet.softmax(V.dot(s_2) + output_bias)[0]

            return [o_t, s_1, s_2]
Ejemplo n.º 13
0
    def __init__(self, optimizer_params, model_obj=None, X=None, Y=None, Y_aux=[], top_loss=None, params=None):
        print "Compiling RPROP..."
        super(compileRPROP, self).__init__(model_obj, X, Y, Y_aux, top_loss, params)

        self.LRs = []
        RPROP_updates = []

        # Initialise shared variables for the Training algos
        for i, para in enumerate(self.params):
            if para in self.params[:i]:
                print "Detected RNN or shared param @index =", i
            else:
                self.LRs.append(
                    theano.shared(
                        np.float32(optimizer_params["initial_update_size"])
                        * np.ones(para.get_value().shape, dtype="float32"),
                        name=para.name + str("_RPROP"),
                        borrow=0,
                    )
                )

        print "RPROP: missing backtracking handling "  ###TODO ???
        for param_i, grad_i, last_grad_i, pLR_i in zip(self.params, self.gradients, self.last_grads, self.LRs):
            # Commented code on next 4 lines is theano-incapable and just illustration!!!
            # if   ((last_grad_i*grad_i) < -1e-9): # sign-change & significant magnitude of last two gradients
            #   pLR_i_new = pLR_i * (1 - np.float32(RPROP_penalty)) # decrease this LR
            # elif ((last_grad_i*grad_i) > 1e-11): # no sign-change & and last two gradients were sufficiently big
            #   pLR_i_new = pLR_i * (1 + np.float32(RPORP_gain))    # increase this LR

            # capping RPROP-LR inside [1e-7,2e-3]
            RPROP_updates.append(
                (
                    pLR_i,
                    T.minimum(
                        T.maximum(
                            pLR_i
                            * (
                                1
                                - np.float32(optimizer_params["penalty"]) * ((last_grad_i * grad_i) < -1e-9)
                                + np.float32(optimizer_params["gain"]) * ((last_grad_i * grad_i) > 1e-11)
                            ),
                            1e-7 * T.ones_like(pLR_i),
                        ),
                        2e-3 * T.ones_like(pLR_i),
                    ),
                )
            )
            RPROP_updates.append(
                (param_i, param_i - pLR_i * grad_i / (T.abs_(grad_i) + 1e-6) - (self.weightdecay * param_i))
            )
            RPROP_updates.append((last_grad_i, grad_i))

        self.step = theano.function(
            [self.X, self.Y] + self.Y_aux,
            [self.top_loss, self.loss_instance],
            updates=RPROP_updates,
            on_unused_input="warn",
        )
        print " Compiling done  - in %.3f s!" % (time.time() - self.t_init)
Ejemplo n.º 14
0
 def __init__(self, gtype, alfa=0.02, ifreset=False, countmax=100):
     self._alfa = alfa
     self._gradsum = T.ones_like(gtype)
     self._gradsum_init = T.ones_like(gtype)
     # parameters for resetting _grad_sum
     self._ifreset = ifreset
     self._counter = 0
     self._countmax = countmax
Ejemplo n.º 15
0
    def sample(self, alpha, beta):
        z_1 = super(BetaSample,
                    self).sample(alpha, T.ones_like(alpha))

        z_2 = super(BetaSample,
                    self).sample(beta, T.ones_like(beta))

        return z_1 / (z_1 + z_2)
Ejemplo n.º 16
0
def adaptive_mask(mask, black=0., ignore=0.5, white=1.):
    bw = ignore * T.ones_like(mask, dtype=floatX)
    t_black = black*T.ones_like(bw, dtype=floatX)
    t_white = white*T.ones_like(bw, dtype=floatX)
    white_idx = (mask > MASK["IGNORE"]).nonzero()
    black_idx = (mask < MASK["BACKGROUND_RING"]).nonzero()
    bw = T.set_subtensor(bw[white_idx], t_white[white_idx])
    bw = T.set_subtensor(bw[black_idx], t_black[black_idx])
    return bw
Ejemplo n.º 17
0
    def get_model(self,X, Y, x_test):
        '''
        Gaussian Process Regression model.
        Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006

        Args:
            X: tensor matrix, training data
            Y: tensor matrix, training target
            x_test: tensor matrix, testing data
        
        Returns:
            K: prior cov matrix
            Ks: prior joint cov matrix
            Kss: prior cov matrix for testing data
            Posterior Distribution:
                alpha: alpha = inv(K)*(mu-m)
                sW: vector containing diagonal of sqrt(W)
                L: L = chol(sW*K*sW+eye(n))
            y_test_mu: predictive mean
            y_test_var: predictive variance
            fs2: predictive latent variance
        Note: the cov matrix inverse is computed through Cholesky factorization
        https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/
        '''
        # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14)
        K = self.covFunc(X,X,'K') # pior cov
        #m = T.mean(Y)*T.ones_like(Y) # pior mean
        m = self.mean*T.ones_like(Y) # pior mean

        # Compute GP joint prior distribution between training and test (eq 2.18)
        Ks = self.covFunc(X,x_test,'Ks')
        # Pay attention!! here is the self test cov matrix.
        Kss = self.covFunc(x_test,x_test,'Kss',mode='self_test')

        # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood.
        sn2 = T.exp(2*self.sigma_n) # noise variance of likGauss
        L = sT.cholesky(K/sn2 + T.identity_like(K))
        sl = sn2
        alpha = T.dot(sT.matrix_inverse(L.T), 
                      T.dot(sT.matrix_inverse(L), (Y-m)) ) / sl
        sW = T.ones_like(T.sum(K,axis=1)).reshape((K.shape[0],1)) / T.sqrt(sl)
        log_likelihood = T.sum(-0.5 * (T.dot((Y-m).T, alpha)) - T.sum(T.log(T.diag(L))) - X.shape[0] / 2 * T.log(2.*np.pi*sl))
        
        
        # Compute predictive distribution using the computed posterior distribution.
        fmu = m + T.dot(Ks.T, alpha) # Prediction Mu fs|f, eq 2.25 
        V = T.dot(sT.matrix_inverse(L),T.extra_ops.repeat(sW,x_test.shape[0],axis=1)*Ks)
        fs2 = Kss - (T.sum(V*V,axis=0)).reshape((1,V.shape[1])).T # Predication Sigma, eq 2.26
        fs2 = T.maximum(fs2,0) # remove negative variance noise
        #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions

        y_test_mu = fmu
        y_test_var = fs2 + sn2

        return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha,V, fs2,sW
Ejemplo n.º 18
0
 def _alignData(self, w_t, sv_tm1):
     # padding dummy element 
     mask = T.concatenate([T.ones_like(sv_tm1),
         T.ones_like(sv_tm1[:,-1:])],axis=1)
     # iterate over batch 
     mask,_ = theano.scan(fn=self._batchAlign,
             sequences=[w_t,mask],
             outputs_info=None)
     # mask the slot-value vector
     sv_t = mask[:,:-1] * sv_tm1
     return sv_t
Ejemplo n.º 19
0
        def _step(m_, x_, h_):
            preact = T.dot(h_, self.U)
            preact += x_

            z = T.nnet.sigmoid(_slice(preact, 0, hidden_size))
            r = T.nnet.sigmoid(_slice(preact, 1, hidden_size))
            c = T.tanh(_slice(preact, 2, hidden_size) * r + (T.ones_like(r) - r) * _slice(x_, 2, hidden_size))

            h = (T.ones_like(z) - z) * c + z * h_
            h = m_[:, None] * h + (1. - m_)[:, None] * h_

            return h
Ejemplo n.º 20
0
 def gibbs_iteration(g1, s1, h1, t1, v):
     if mean_field:
         g2 = self.g_given_htv(h1, t1, v) 
         s2 = self.s_given_ghtv(T.ones_like(g2), h1, t1, v)
         h2 = self.h_given_gsv(g2, s2, v)
         t2 = self.t_given_gshv(g2, s2, T.ones_like(h2), v)
     else:
         g2 = self.sample_g_given_htv(h1, t1, v) 
         s2 = self.sample_s_given_ghtv(g2, h1, t1, v)
         h2 = self.sample_h_given_gsv(g2, s2, v)
         t2 = self.sample_t_given_gshv(g2, s2, h2, v)
     return [g2, s2, h2, t2]
Ejemplo n.º 21
0
def test_gpualloc_input_on_gpu():
    a_val = numpy.asarray(numpy.random.rand(4,5),dtype='float32')
    a = tcn.shared_constructor(a_val)

    b = T.fscalar()
    f = theano.function([b], T.ones_like(a)+b, mode=mode_without_gpu)
    f_gpu = theano.function([b], T.ones_like(a)+b, mode=mode_with_gpu)

    assert sum([node.op == T.alloc for node in f.maker.env.toposort()])==1
    assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()])==1

    assert numpy.allclose(numpy.ones(a.get_value(borrow=True).shape)+9,f_gpu(9))
    assert numpy.allclose(f(5),f_gpu(5))
Ejemplo n.º 22
0
 def forward_prop_step(x_t, s_t1_prev, s_t2_prev):
     x_e = E[:, x_t]
     #L1
     z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0])
     r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1])
     c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2])
     s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev
     # L2
     z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3])
     r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4])
     c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5])
     s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev
     o_t = T.nnet.softmax(V.dot(s_t2) + c)[0]
     return [o_t, s_t1, s_t2]
Ejemplo n.º 23
0
Archivo: gan.py Proyecto: nebw/beras
def gan_binary_crossentropy(d_out_given_fake_for_gen,
                            d_out_given_fake_for_dis,
                            d_out_given_real):
    d_loss_fake = binary_crossentropy(
        T.zeros_like(d_out_given_fake_for_dis),
        d_out_given_fake_for_dis).mean()
    d_loss_real = binary_crossentropy(
        T.ones_like(d_out_given_real),
        d_out_given_real).mean()
    d_loss = d_loss_real + d_loss_fake
    g_loss = binary_crossentropy(
        T.ones_like(d_out_given_fake_for_gen),
        d_out_given_fake_for_gen).mean()
    return g_loss, d_loss, d_loss_real, d_loss_fake
Ejemplo n.º 24
0
    def build_loss(deterministic):
        # this currently has the problem that these 3 expressions come from 3 different
        # get_output calls, so they won't return the same mask if dropout or other
        # noise is used. Currently not using dropout so not a problem.
        ae = nn.layers.get_output(ldict['ae_out'], deterministic=deterministic)
        disc_real = nn.layers.get_output(ldict['disc_out'], deterministic=deterministic)
        disc_fake = nn.layers.get_output(ldict['disc_out'], { ldict['disc_in']:ae },
                deterministic=deterministic)

        d_cost_real=nn.objectives.binary_crossentropy(disc_real, T.ones_like(disc_real)).mean()
        d_cost_fake=nn.objectives.binary_crossentropy(disc_fake, T.zeros_like(disc_fake)).mean()
        g_cost=nn.objectives.binary_crossentropy(disc_fake, T.ones_like(disc_fake)).mean()
        d_cost = d_cost_real + d_cost_fake
        mse = nn.objectives.squared_error(ae, X).mean()
        return g_cost, d_cost, mse
    def test_scan_err1(self):
        # This test should fail when building fx for the first time
        orig_compute_test_value = theano.config.compute_test_value
        try:
            theano.config.compute_test_value = 'raise'

            k = T.iscalar("k")
            A = T.matrix("A")
            k.tag.test_value = 3
            A.tag.test_value = numpy.random.rand(5,3).astype(config.floatX)

            def fx(prior_result, A):
                return T.dot(prior_result, A)

            # Since we have to inspect the traceback,
            # we cannot simply use self.assertRaises()
            try:
                theano.scan(
                        fn=fx,
                        outputs_info=T.ones_like(A),
                        non_sequences=A,
                        n_steps=k)
                assert False
            except ValueError, e:
                # Get traceback
                tb = sys.exc_info()[2]
                # Get frame info 4 layers up
                frame_info = traceback.extract_tb(tb)[-5]
                # We should be in the "fx" function defined above
                assert os.path.split(frame_info[0])[1] == 'test_compute_test_value.py'
                assert frame_info[2] == 'fx'

        finally:
            theano.config.compute_test_value = orig_compute_test_value
Ejemplo n.º 26
0
    def get_constraint_updates(self):
        constraint_updates = OrderedDict() 

        if self.flags['wv_norm'] == 'unit':
            constraint_updates[self.Wv] = self.Wv / self.norm_wv
        elif self.flags['wv_norm'] == 'max_unit':
            constraint_updates[self.Wv] = self.Wv / self.norm_wv * T.minimum(self.norm_wv, 1.0)

        if self.flags['scalar_lambd']:
            constraint_updates[self.lambd] = T.mean(self.lambd) * T.ones_like(self.lambd)

        ## Enforce sparsity pattern on g if required ##
        if self.sparse_gmask:
            constraint_updates[self.Wg] = self.Wg * self.sparse_gmask.mask.T

        ## clip parameters to maximum values (if applicable)
        for (k,v) in self.clip_max.iteritems():
            assert k in [param.name for param in self.params()]
            param = constraint_updates.get(k, getattr(self, k))
            constraint_updates[param] = T.clip(param, param, v)

        ## clip parameters to minimum values (if applicable)
        for (k,v) in self.clip_min.iteritems():
            assert k in [param.name for param in self.params()]
            param = constraint_updates.get(k, getattr(self, k))
            constraint_updates[param] = T.clip(constraint_updates.get(param, param), v, param)

        return constraint_updates
Ejemplo n.º 27
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizers.get(optimizer)

        self.loss = objectives.get(loss)
        weighted_loss = weighted_objective(objectives.get(loss))

        # input of model
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        self.weights = T.ones_like(self.y_train)

        train_loss = weighted_loss(self.y, self.y_train, self.weights)
        test_loss = weighted_loss(self.y, self.y_test, self.weights)

        train_loss.name = 'train_loss'
        test_loss.name = 'test_loss'
        self.y.name = 'y'

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode
        self.theano_mode = theano_mode

        for r in self.regularizers:
            train_loss = r(train_loss)
        updates = self.optimizer.get_updates(self.params, self.constraints, train_loss)

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y, self.weights]
            test_ins = self.X_test + [self.y, self.weights]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y, self.weights]
            test_ins = [self.X_test, self.y, self.weights]
            predict_ins = [self.X_test]

        self._train = theano.function(train_ins, train_loss,
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._predict = theano.function(predict_ins, self.y_test,
            allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_loss,
            allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy],
            allow_input_downcast=True, mode=theano_mode)
Ejemplo n.º 28
0
 def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
     sigma2 = tt.square(sigma)
     Kuu = cov_total(Xu)
     Kuf = cov_total(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = cov_total(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
     else:  # VFE or DTC
         Lamd = tt.ones_like(Qffd) * sigma2
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - mean_total(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     Kus = self.cov_func(Xu, Xnew)
     As = solve_lower(Luu, Kus)
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
     C = solve_lower(L_B, As)
     if diag:
         Kss = self.cov_func(Xnew, diag=True)
         var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
         if pred_noise:
             var += sigma2
         return mu, var
     else:
         cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
                tt.dot(tt.transpose(C), C))
         if pred_noise:
             cov += sigma2 * tt.identity_like(cov)
         return mu, stabilize(cov)
    def test_scan(self):
        """
        Test the compute_test_value mechanism Scan.
        """
        orig_compute_test_value = theano.config.compute_test_value
        try:
            theano.config.compute_test_value = 'raise'
            #theano.config.compute_test_value = 'warn'
            k = T.iscalar("k")
            A = T.vector("A")
            k.tag.test_value = 3
            A.tag.test_value = numpy.random.rand(5).astype(config.floatX)

            def fx(prior_result, A):
                return prior_result * A
            # Symbolic description of the result
            result, updates = theano.scan(fn=fx,
                                          outputs_info=T.ones_like(A),
                                          non_sequences=A,
                                          n_steps=k)

            # We only care about A**k, but scan has provided us with A**1 through A**k.
            # Discard the values that we don't care about. Scan is smart enough to
            # notice this and not waste memory saving them.
            final_result = result[-1]
            assert hasattr(final_result.tag, 'test_value')
        finally:
            theano.config.compute_test_value = orig_compute_test_value
Ejemplo n.º 30
0
 def one_step(x_t, s_t_prev, U, V, W):
     z = T.nnet.hard_sigmoid(T.dot(U[0], x_t) + T.dot(s_t_prev, W[0]))
     r = T.nnet.hard_sigmoid(T.dot(U[1], x_t) + T.dot(s_t_prev, W[1]))
     h = T.tanh(T.dot(U[2], x_t) + T.dot((s_t_prev * r), W[2]))
     s_t = (T.ones_like(z) - z) * h + z * s_t_prev
     o_t = T.nnet.softmax(T.dot(V, s_t))
     return o_t[0], s_t
Ejemplo n.º 31
0
    def apply(self, inputs, states, cells, location, scale, alpha, mask=None):
        def slice_last(x, no):
            return x[:, no * self.lstm_dim:(no + 1) * self.lstm_dim]

        tanh = self.children[1].apply
        cropper = self.children[0]

        # inputs shape:  B x C x X x Y
        # outputs shape: B x C' x X' x Y'
        conved_part_1 = self.apply_conv(
            inputs,
            conv_layers=self.conv_layers[0:self.num_layers_first_half_of_conv])

        # inputs shape:  B x C x X x Y
        # outputs shape: B x 1 x X x Y
        # if self.num_channels == 3:
        #     gray_scale_inputs = self.rgb2gray(inputs)
        # else:
        gray_scale_inputs = inputs

        # inputs shape:  B x 1 x X x Y
        # outputs shape: B x 1 x X' x Y'
        downn_sampled_inputs = self.down_sampler(gray_scale_inputs)

        # shape: B x F
        flat_downn_sampled_inputs = downn_sampled_inputs.flatten(ndim=2)

        # inputs shape:  B x F'
        # outputs shape: B x 3
        mlp_output = self.apply_attention_mlp(
            tensor.concatenate([
                flat_downn_sampled_inputs, 0.00001 * location, 0.00001 * scale,
                0.00001 * alpha, states
            ],
                               axis=1))
        location = mlp_output[:, 0:2]
        location.name = 'location'
        scale = mlp_output[:, 2:3]
        scale.name = 'scale'
        alpha = mlp_output[:, 3:]
        alpha.name = 'alpha'

        scale2d = tensor.concatenate([scale, scale], axis=1)
        alpha2d = tensor.concatenate([alpha, alpha], axis=1)

        # inputs shape:  B x C' x X' x Y'
        # outputs shape: B x C' x X'' x Y''
        loc_to_cropper = ((location + tensor.ones_like(location)) * np.array([
            self.cropper_input_shape[0] * 0.4,
            self.cropper_input_shape[1] * 0.4
        ]).astype('float32') + np.array([
            self.cropper_input_shape[0] * 0.1,
            self.cropper_input_shape[1] * 0.1
        ]).astype('float32'))

        scale_to_cropper = ((scale2d + tensor.ones_like(scale2d)) * np.array([
            (1.1 - self.min_scale[0]) / 2.0, (1.1 - self.min_scale[1]) / 2.0
        ]).astype('float32') + np.array(self.min_scale).astype('float32'))

        alpha_to_cropper = ((alpha2d + tensor.ones_like(alpha2d)) * np.array(
            [0.98 / 2.0 + 0.001, 0.98 / 2.0 + 0.001]).astype('float32'))

        patch, _, _ = cropper.apply(
            conved_part_1,
            np.array([list(self.cropper_input_shape)]),
            # 0.00001 * loc_to_cropper + locs,
            # 0.00001 * scale_to_cropper + 1.0 * tensor.ones_like(scale_to_cropper),
            # 0.00001 * alpha_to_cropper + 0.001 * tensor.ones_like(scale_to_cropper))
            loc_to_cropper,
            scale_to_cropper,
            alpha_to_cropper)
        patch.name = 'patch'

        conved_part_2 = self.apply_conv(
            patch,
            conv_layers=self.conv_layers[self.num_layers_first_half_of_conv:])
        flat_conved_part_2 = conved_part_2.flatten(2)

        pre_lstm = self.apply_fc(flat_conved_part_2)
        pre_lstm = tensor.concatenate([pre_lstm, location, scale, alpha],
                                      axis=1)
        transformed_pre_lstm = tensor.dot(pre_lstm,
                                          self.W_pre_lstm) + self.b_pre_lstm

        activation = tensor.dot(states, self.W_state) + transformed_pre_lstm
        in_gate = tensor.nnet.sigmoid(slice_last(activation, 0))
        forget_gate_input = slice_last(activation, 1)
        forget_gate = tensor.nnet.sigmoid(forget_gate_input +
                                          tensor.ones_like(forget_gate_input))
        next_cells = (forget_gate * cells +
                      in_gate * tanh(slice_last(activation, 2)))
        out_gate = tensor.nnet.sigmoid(slice_last(activation, 3))
        next_states = out_gate * tanh(next_cells)

        if mask:
            next_states = (mask[:, None] * next_states +
                           (1 - mask[:, None]) * states)
            next_cells = (mask[:, None] * next_cells +
                          (1 - mask[:, None]) * cells)

        return (next_states, next_cells, location, scale, alpha, patch,
                downn_sampled_inputs, conved_part_1, conved_part_2, pre_lstm)
Ejemplo n.º 32
0
 def get_output_for(self, input, **kwargs):
     return T.ones_like(input) * self.constant
Ejemplo n.º 33
0
                                                       t_worst * 1000)

    #-------------------------------------------------------------------------

    i = T.arange(100)
    A = theano.shared(np.random.normal(size=(10, 10)))

    def fn1(seq, acc):
        return T.dot(acc, A)

    print "-" * 78
    print "Unrolled SCAN:"
    outputs, updates = unrolled_scan(fn1,
                                     name='fn1',
                                     sequences=[i],
                                     outputs_info=[T.ones_like(A)],
                                     unroll=10)
    f_fn1 = theano.function([], outputs[-1], name='fn1')

    res = f_fn1()
    print res.shape
    print res
    benchmark(f_fn1)

    print "-" * 78
    print "Normal SCAN:"
    outputs, updates = theano.scan(fn1,
                                   name='fn1',
                                   sequences=[i],
                                   outputs_info=[T.ones_like(A)])
    f_fn1 = theano.function([], outputs[-1], name='fn1')
Ejemplo n.º 34
0
    def __init__(self, rng=None,
            x_in=None, x_mask=None, x_out=None, \
            p_zi_given_xi=None, \
            p_sip1_given_zi=None, \
            q_zi_given_xi=None, \
            params=None, \
            shared_param_dicts=None):
        # setup a rng for this GIPair
        self.rng = RandStream(rng.randint(100000))

        # grab the user-provided parameters
        self.params = params
        self.x_dim = self.params['x_dim']
        self.z_dim = self.params['z_dim']
        self.imp_steps = self.params['imp_steps']
        self.step_type = self.params['step_type']
        self.x_type = self.params['x_type']
        assert ((self.x_type == 'bernoulli') or (self.x_type == 'gaussian'))
        self.shared_param_dicts = shared_param_dicts

        # grab handles to the relevant InfNets
        self.p_zi_given_xi = p_zi_given_xi
        self.p_sip1_given_zi = p_sip1_given_zi
        self.q_zi_given_xi = q_zi_given_xi

        # record the symbolic variables that will provide inputs to the
        # computation graph created to describe this MultiStageModel
        self.x_in = x_in
        self.x_out = x_out
        self.x_mask = x_mask
        self.zi_zmuv = T.tensor3()

        # setup switching variable for changing between sampling/training
        zero_ary = to_fX(np.zeros((1, )))
        self.train_switch = theano.shared(value=zero_ary,
                                          name='msm_train_switch')
        self.set_train_switch(1.0)

        if self.shared_param_dicts is None:
            # initialize parameters "owned" by this model
            s0_init = to_fX(np.zeros((self.x_dim, )))
            init_ary = to_fX(np.zeros((self.x_dim, )))
            self.x_null = theano.shared(value=init_ary, name='gpis_xn')
            self.grad_null = theano.shared(value=init_ary, name='gpsi_gn')
            self.s0 = theano.shared(value=s0_init, name='gpsi_s0')
            self.obs_logvar = theano.shared(value=zero_ary,
                                            name='gpsi_obs_logvar')
            self.bounded_logvar = 8.0 * T.tanh(
                (1.0 / 8.0) * self.obs_logvar[0])
            self.shared_param_dicts = {}
            self.shared_param_dicts['x_null'] = self.x_null
            self.shared_param_dicts['grad_null'] = self.grad_null
            self.shared_param_dicts['s0'] = self.s0
            self.shared_param_dicts['obs_logvar'] = self.obs_logvar
        else:
            # grab the parameters required by this model from a given dict
            self.x_null = self.shared_param_dicts['x_null']
            self.grad_null = self.shared_param_dicts['grad_null']
            self.s0 = self.shared_param_dicts['s0']
            self.obs_logvar = self.shared_param_dicts['obs_logvar']
            self.bounded_logvar = 8.0 * T.tanh(
                (1.0 / 8.0) * self.obs_logvar[0])

        ##################################################
        # Setup the iterative imputation loop using scan #
        ##################################################
        self.ones_mask = T.ones_like(self.x_mask)

        def imp_step_func(zi_zmuv, si):
            si_as_x = self._si_as_x(si)
            xi_unmasked = self.x_out
            xi_masked = (self.x_mask * xi_unmasked) + \
                        ((1.0 - self.x_mask) * si_as_x)
            grad_unmasked = self.x_out - si_as_x
            grad_masked = self.x_mask * grad_unmasked
            # get samples of next zi, according to the global policy
            zi_p_mean, zi_p_logvar = self.p_zi_given_xi.apply(xi_masked)
            zi_p = zi_p_mean + (T.exp(0.5 * zi_p_logvar) * zi_zmuv)
            # get samples of next zi, according to the guide policy
            zi_q_mean, zi_q_logvar = self.q_zi_given_xi.apply(
                T.concatenate([xi_masked, xi_unmasked], axis=1))
            zi_q = zi_q_mean + (T.exp(0.5 * zi_q_logvar) * zi_zmuv)

            # make zi samples that can be switched between zi_p and zi_q
            zi = ((self.train_switch[0] * zi_q) + \
                 ((1.0 - self.train_switch[0]) * zi_p))
            # compute relevant KLds for this step
            kldi_q2p = gaussian_kld(zi_q_mean, zi_q_logvar, zi_p_mean,
                                    zi_p_logvar)  # KL(q || p)
            kldi_p2q = gaussian_kld(zi_p_mean, zi_p_logvar, zi_q_mean,
                                    zi_q_logvar)  # KL(p || q)
            kldi_p2g = gaussian_kld(zi_p_mean, zi_p_logvar, 0.0,
                                    0.0)  # KL(p || global prior)

            # compute the next si, given the sampled zi
            hydra_out = self.p_sip1_given_zi.apply(zi)
            si_step = hydra_out[0]
            if (self.step_type == 'jump'):
                # jump steps always completely overwrite the current guesses
                sip1 = si_step
            elif (self.step_type == 'add'):
                # add steps just update the guesses additively
                sip1 = si + si_step
            elif (self.step_type == 'lstm'):
                # LSTM-style updates with write and erase gates
                write_gate = 1.1 * T.nnet.sigmoid(1.0 + hydra_out[1])
                erase_gate = 1.1 * T.nnet.sigmoid(1.0 + hydra_out[2])
                sip1 = (erase_gate * si) + (write_gate * si_step)
            elif (self.step_type == 'layer'):
                alpha_gate = T.nnet.sigmoid(hydra_out[1])
                sip1 = (alpha_gate * si) + ((1.0 - alpha_gate) * si_step)
            else:
                assert False, "Unknown step type!"

            # compute NLL for the current imputation
            nlli = self._construct_nll_costs(sip1, self.x_out, self.x_mask)
            return sip1, nlli, kldi_q2p, kldi_p2q, kldi_p2g

        # apply scan op for the sequential imputation loop
        self.s0_full = T.alloc(0.0, self.x_in.shape[0], self.x_dim) + self.s0
        init_vals = [self.s0_full, None, None, None, None]
        self.scan_results, self.scan_updates = theano.scan(imp_step_func, \
                    outputs_info=init_vals, sequences=self.zi_zmuv)

        self.si = self.scan_results[0]
        self.nlli = self.scan_results[1]
        self.kldi_q2p = self.scan_results[2]
        self.kldi_p2q = self.scan_results[3]
        self.kldi_p2g = self.scan_results[4]

        # get the initial imputation state
        self.x0 = (self.x_mask * self.x_in) + \
                  ((1.0 - self.x_mask) * self._si_as_x(self.s0_full))

        ######################################################################
        # ALL SYMBOLIC VARS NEEDED FOR THE OBJECTIVE SHOULD NOW BE AVAILABLE #
        ######################################################################

        # shared var learning rate for generator and inferencer
        zero_ary = to_fX(np.zeros((1, )))
        self.lr = theano.shared(value=zero_ary, name='gpsi_lr')
        # shared var momentum parameters for generator and inferencer
        self.mom_1 = theano.shared(value=zero_ary, name='gpsi_mom_1')
        self.mom_2 = theano.shared(value=zero_ary, name='gpsi_mom_2')
        # init parameters for controlling learning dynamics
        self.set_sgd_params()
        # init shared var for weighting nll of data given posterior sample
        self.lam_nll = theano.shared(value=zero_ary, name='gpsi_lam_nll')
        self.set_lam_nll(lam_nll=1.0)
        # init shared var for weighting prior kld against reconstruction
        self.lam_kld_p = theano.shared(value=zero_ary, name='gpsi_lam_kld_p')
        self.lam_kld_q = theano.shared(value=zero_ary, name='gpsi_lam_kld_q')
        self.lam_kld_g = theano.shared(value=zero_ary, name='gpsi_lam_kld_g')
        self.set_lam_kld(lam_kld_p=0.05, lam_kld_q=0.95, lam_kld_g=0.0)
        # init shared var for controlling l2 regularization on params
        self.lam_l2w = theano.shared(value=zero_ary, name='msm_lam_l2w')
        self.set_lam_l2w(1e-5)

        # Grab all of the "optimizable" parameters in "group 1"
        self.joint_params = [self.s0, self.obs_logvar]
        self.joint_params.extend(self.p_zi_given_xi.mlp_params)
        self.joint_params.extend(self.p_sip1_given_zi.mlp_params)
        self.joint_params.extend(self.q_zi_given_xi.mlp_params)

        #################################
        # CONSTRUCT THE KLD-BASED COSTS #
        #################################
        self.kld_p, self.kld_q, self.kld_g = self._construct_kld_costs(p=1.0)
        self.kld_costs = (self.lam_kld_p[0] * self.kld_p) + \
                         (self.lam_kld_q[0] * self.kld_q) + \
                         (self.lam_kld_g[0] * self.kld_g)
        self.kld_cost = T.mean(self.kld_costs)
        #################################
        # CONSTRUCT THE NLL-BASED COSTS #
        #################################
        self.nll_costs = self.nlli[-1]
        self.nll_cost = self.lam_nll[0] * T.mean(self.nll_costs)
        self.nll_bounds = self.nll_costs.ravel() + self.kld_q.ravel()
        self.nll_bound = T.mean(self.nll_bounds)
        ########################################
        # CONSTRUCT THE REST OF THE JOINT COST #
        ########################################
        param_reg_cost = self._construct_reg_costs()
        self.reg_cost = self.lam_l2w[0] * param_reg_cost
        self.joint_cost = self.nll_cost + self.kld_cost + self.reg_cost
        ##############################
        # CONSTRUCT A PER-TRIAL COST #
        ##############################
        self.obs_costs = self.nll_costs + self.kld_costs

        # Get the gradient of the joint cost for all optimizable parameters
        print("Computing gradients of self.joint_cost...")
        self.joint_grads = OrderedDict()
        grad_list = T.grad(self.joint_cost, self.joint_params)
        for i, p in enumerate(self.joint_params):
            self.joint_grads[p] = grad_list[i]

        # Construct the updates for the generator and inferencer networks
        self.joint_updates = get_adam_updates(params=self.joint_params, \
                grads=self.joint_grads, alpha=self.lr, \
                beta1=self.mom_1, beta2=self.mom_2, \
                mom2_init=1e-3, smoothing=1e-4, max_grad_norm=10.0)
        for k, v in self.scan_updates.items():
            self.joint_updates[k] = v

        # Construct a function for jointly training the generator/inferencer
        print("Compiling cost computer...")
        self.compute_raw_costs = self._construct_raw_costs()
        print("Compiling training function...")
        self.train_joint = self._construct_train_joint()
        print("Compiling free-energy sampler...")
        self.compute_fe_terms = self._construct_compute_fe_terms()
        print("Compiling best step cost computer...")
        self.compute_per_step_cost = self._construct_compute_per_step_cost()
        print("Compiling data-guided imputer sampler...")
        self.sample_imputer = self._construct_sample_imputer()
        # make easy access points for some interesting parameters
        #self.gen_inf_weights = self.p_zi_given_xi.shared_layers[0].W
        return
Ejemplo n.º 35
0
    def sym_gradients_new(self, X):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert (non_linearity_name == "sigmoid" or non_linearity_name == "RLU")
        # First element is different (it is predicted from the bias only)
        init_a = T.zeros_like(T.dot(X.T, self.W))  # BxH
        init_x = T.ones_like(X[0])

        def a_i_given_a_im1(x, w, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1),
                               T.shape_padleft(w, 1))
            return (a, x)

        ([As, _], updates) = theano.scan(a_i_given_a_im1,
                                         sequences=[X, self.W],
                                         outputs_info=[init_a, init_x])
        top_activations = As[-1]
        Xs_m1 = T.set_subtensor(X[1:, :], X[0:-1, :])
        Xs_m1 = T.set_subtensor(Xs_m1[0, :], 1)

        # Reconstruct the previous activations and calculate (for that visible dimension) the density and all the gradients
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu,
                                  b_mu, V_sigma, b_sigma, activation_factor,
                                  a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -constantX(0.5) * T.sqr(
                (Mu - T.shape_padright(x_i, 1)) /
                Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi))
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = -log_sum_exp(wPhi)  # negative log likelihood
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma)
            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
            gb_mu = dp_dz_mu.mean(0, dtype=floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            dp_dz_sigma = Pi * (
                T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1)
            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            gb_sigma = dp_dz_sigma.mean(0, dtype=floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(
                dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i -
                    T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha,
                    gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)

        p_accum = T.zeros_like(X[0])
        dP_da_ip1 = T.zeros_like(top_activations)
        ([
            _, ps, _, gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma,
            gfact
        ], updates2) = theano.scan(density_and_gradients,
                                   go_backwards=True,
                                   sequences=[
                                       X, Xs_m1, self.W, self.V_alpha,
                                       self.b_alpha, self.V_mu, self.b_mu,
                                       self.V_sigma, self.b_sigma,
                                       self.activation_rescaling
                                   ],
                                   outputs_info=[
                                       top_activations, p_accum, dP_da_ip1,
                                       None, None, None, None, None, None,
                                       None, None
                                   ])
        # scan with go_backwards returns the matrices in the order they were created, so we have to reverse the order of the rows
        gW = gW[::-1, :]
        gb_alpha = gb_alpha[::-1, :]
        gV_alpha = gV_alpha[::-1, :, :]
        gb_mu = gb_mu[::-1, :]
        gV_mu = gV_mu[::-1, :, :]
        gb_sigma = gb_sigma[::-1, :]
        gV_sigma = gV_sigma[::-1, :, :]
        gfact = gfact[::-1]

        updates.update(updates2)  # Returns None
        return (ps[-1], {
            "W": gW,
            "b_alpha": gb_alpha,
            "V_alpha": gV_alpha,
            "b_mu": gb_mu,
            "V_mu": gV_mu,
            "b_sigma": gb_sigma,
            "V_sigma": gV_sigma,
            "activation_rescaling": gfact
        }, updates)
Ejemplo n.º 36
0
    def _get_gradients_adagrad(self, J):
        """Get the AdaGrad gradients and squared gradients updates.

        The returned gradients still need to be multiplied with the general
        learning rate.

        Parameters
        ----------
        J : theano variable
            cost

        Returns
        -------
        theano variable
            gradients that are adapted by the AdaGrad algorithm
        theano variable
            updated sum of squares for all previous steps
        """
        grads = T.grad(J, [
            self.__dict__[self.updatable_parameters[i]]
            for i in xrange(len(self.updatable_parameters))
        ])

        for i, _ in enumerate(grads):
            grads[i] = debug_print(grads[i],
                                   'grads_' + self.updatable_parameters[i])

        updated_squares = dict()

        # Add squared gradient to the squared gradient matrix for AdaGrad and
        # recalculate the gradient.
        for i, p in enumerate(self.updatable_parameters):

            # We need to handle sparse gradient variables differently
            if isinstance(grads[i], sparse.SparseVariable):
                # Add the sqares to the matrix
                power = debug_print(sparse.structured_pow(grads[i], 2.),
                                    'pow_' + p)
                # Remove zeros (might happen when squaring near zero values)
                power = sparse.remove0(power)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Get only those squares that will be altered, for all others we
                # don't have gradients, i.e., we don't need to consider them at
                # all.
                sqrt_matrix = sparse.sp_ones_like(power)
                sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix,
                                          'adagrad_squares_subset_' + p)

                # Take the square root of the matrix subset.
                sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix),
                                          'adagrad_sqrt_' + p)
                # Calc 1. / the square root.
                sqrt_matrix = debug_print(
                    sparse.structured_pow(sqrt_matrix, -1.),
                    'adagrad_pow-1_' + p)
                grads[i] = sparse.mul(grads[i], sqrt_matrix)
            else:
                power = debug_print(T.pow(grads[i], 2.), 'pow_' + p)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Call sqrt only for those items that are non-zero.
                denominator = T.switch(
                    T.neq(updated_squares[p], 0.0), T.sqrt(updated_squares[p]),
                    T.ones_like(updated_squares[p], dtype=floatX))
                grads[i] = T.mul(grads[i], 1. / denominator)

            updated_squares[p] = debug_print(updated_squares[p],
                                             'upd_squares_' + p)

        for i, _ in enumerate(grads):
            grads[i] = debug_print(
                grads[i], 'grads_updated_' + self.updatable_parameters[i])

        return grads, updated_squares
Ejemplo n.º 37
0
def get_elementwise_objective(qvalues,
                              actions,
                              rewards,
                              is_alive="always",
                              qvalues_target=None,
                              state_values_target=None,
                              n_steps=1,
                              gamma_or_gammas=0.99,
                              crop_last=True,
                              state_values_target_after_end="zeros",
                              consider_reference_constant=True,
                              aggregation_function="deprecated",
                              force_end_at_last_tick=False,
                              return_reference=False,
                              loss_function=squared_error):
    """
    Returns squared error between predicted and reference Q-values according to n-step Q-learning algorithm

        Qreference(state,action) = reward(state,action) + gamma*reward(state_1,action_1) + ... + gamma^n * max[action_n]( Q(state_n,action_n)
        loss = mean over (Qvalues - Qreference)**2

    :param qvalues: [batch,tick,actions] - predicted qvalues
    :param actions: [batch,tick] - commited actions
    :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks
    :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active.

    :param qvalues_target: Q-values used when computing reference (e.g. r+gamma*Q(s',a_max). shape [batch,tick,actions]
        examples:
        (default) If None, uses current Qvalues.
        Older snapshot Qvalues (e.g. from a target network)

    :param state_values_target: state values V(s), used when computing reference (e.g. r+gamma*V(s'), shape [batch_size,seq_length,1]
        double q-learning V(s) = Q_old(s,argmax Q_new(s,a))
        expected_value_sarsa V(s) = E_a~pi(a|s) Q(s,a)
        state values from teacher network (knowledge transfer)

    Must provide either nothing or qvalues_target or state_values_target, not both at once

    :param n_steps: if an integer is given, uses n-step q-learning algorithm
            If 1 (default), this works exactly as normal q-learning
            If None: propagating rewards throughout the whole sequence of state-action pairs.

    :param gamma_or_gammas: delayed reward discounts: a single value or array[batch,tick](can broadcast dimensions).

    :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end

    :param state_values_target_after_end: [batch,1] - symbolic expression for "next best q-values" for last tick
                            used when computing reference Q-values only.
                            Defaults at  T.zeros_like(Q-values[:,0,None,0]). if crop_last=True, simply does not penalize at last tick.
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )
    :param consider_reference_constant: whether or not zero-out gradient flow through reference_qvalues
            (True is highly recommended)

    :param force_end_at_last_tick: if True, forces session end at last tick unless ended otehrwise

    :param return_reference: if True, returns reference Qvalues.
            If False, returns squared_error(action_qvalues, reference_qvalues)
    :param loss_function: loss_function(V_reference,V_predicted). Defaults to (V_reference-V_predicted)**2.
                            Use to override squared error with different loss (e.g. Huber or MAE)

    :return: mean squared error over Q-values (using formula above for loss)

    """
    if aggregation_function != "deprecated":
        raise NotImplementedError(
            "aggregation function has beed deprecated and removed. You can now manually compute "
            "any V(s) and pass it as state_state_values_target. By default it's qvalues.max(axis=-1)"
        )
    #set defaults and assert shapes
    if is_alive == 'always':
        is_alive = T.ones_like(rewards)
    assert qvalues_target is None or state_values_target is None, "Please provide only one of (qvalues_target," \
                                                                  "state_values_target) or none of them, not both"
    assert actions.ndim == rewards.ndim == is_alive.ndim == 2, "actions, rewards and is_alive must have shape [batch,time]"

    assert qvalues.ndim == 3, "q-values must have shape [batch,time,n_actions]"
    assert qvalues_target is None or qvalues_target.ndim == 3, "qvalues_target must have shape[batch,time,n_actions]]"
    assert state_values_target is None or state_values_target.ndim == 2, "state values must have shape [batch,time]"

    #unless already given V(s), compute V(s) as Qvalues of best actions
    state_values_target = state_values_target or T.max(
        qvalues_target or qvalues, axis=-1)

    # get predicted Q-values for committed actions by both current and target networks
    action_qvalues = get_values_for_actions(qvalues, actions)

    # get reference Q-values via Q-learning algorithm
    reference_qvalues = get_n_step_value_reference(
        state_values=state_values_target,
        rewards=rewards,
        is_alive=is_alive,
        n_steps=n_steps,
        gamma_or_gammas=gamma_or_gammas,
        state_values_after_end=state_values_target_after_end,
        end_at_tmax=force_end_at_last_tick,
        crop_last=crop_last,
    )

    if consider_reference_constant:
        # do not pass gradient through reference Qvalues (since they DO depend on Qvalues by default)
        reference_qvalues = consider_constant(reference_qvalues)

    #If asked, make sure loss equals 0 for the last time-tick.
    if crop_last:
        reference_qvalues = T.set_subtensor(reference_qvalues[:, -1],
                                            action_qvalues[:, -1])

    if return_reference:
        return reference_qvalues
    else:
        # tensor of elementwise squared errors
        elwise_squared_error = loss_function(reference_qvalues, action_qvalues)
        return elwise_squared_error * is_alive
Ejemplo n.º 38
0
    def __init__(self,
                 input,
                 n_in,
                 index,
                 theta=None,
                 W=None,
                 b=None):  #input是一个minibatch,单位是一组赛事,不是一个sample

        n_out = 1  #对于CL模型来说,并不是每一类构建一个分类平面,一直都只有一个数值,就是每匹马夺冠的概率

        #把W和b写在theta里面方便T.grad

        if theta is None:
            self.theta = theano.shared(
                value=numpy.zeros(n_in * n_out + n_out,
                                  dtype=theano.config.floatX
                                  #dtype='float32'
                                  ),
                name='theta',
                borrow=True)
        else:
            self.theta = theta

        _W = self.theta[0:n_in * n_out].reshape((n_in, n_out))
        _b = self.theta[n_in * n_out:n_in * n_out + n_out]

        if W is None:
            self.W = _W
            self.b = _b
        else:
            self.W = W
            self.b = b

        # 把线性回归的值exp之后再按组归一化就是最后的值
        _raw_w = T.exp(T.dot(input, self.W) + self.b)

        # 计算每组比赛内的exp和
        def cumsum_within_group(_start, _index, _race):
            start_point = _index[_start]
            stop_point = _index[_start + 1]
            return T.sum(_race[start_point:stop_point], dtype='float32')

        # _cumsum就是每组的exp的合
        _cumsum, _ = theano.scan(cumsum_within_group,
                                 sequences=[T.arange(index.shape[0] - 1)],
                                 non_sequences=[index, _raw_w])

        #构造一个rep(cumsum,times)的序列,目的是直接相除从而得到每匹马的概率
        # _times里存的是每组比赛的马的数量
        self._times, _ = theano.scan(
            fn=lambda i, index: index[i + 1] - index[i],
            sequences=[T.arange(index.shape[0] - 1)],
            non_sequences=index)

        _raceprobdiv = T.ones_like(_raw_w)

        # 这里运用的技巧是构造一个等长的序列,然后用T.set_subtensor改变里面的值,SCAN不允许每次输出长度不一样的序列,所以不可以concatenate
        def change_race_prob_div(_i, _change, _rep, _times, _item):
            _change = T.set_subtensor(
                _change[_rep[_i]:_rep[_i + 1]],
                T.reshape(T.alloc(_item[_i], _times[_i]), (_times[_i], 1)))
            return _change

        # _race_prob_div存的是每一位对应的要除的概率归一化的值
        _race_prob_div, _ = theano.scan(
            fn=change_race_prob_div,
            sequences=[T.arange(index.shape[0] - 1)],
            outputs_info=[_raceprobdiv],
            non_sequences=[index, self._times, _cumsum])

        #归一化以后的概率值,整个init过程最重要的就是计算每匹马的概率,在普通的logistic里计算这个不需要label,只要一个softmax就行
        self.race_prob = _raw_w / _race_prob_div[-1]

        self.mean_neg_loglikelihood = None

        self.neg_log_likelihood = None

        self.pos_log_likelihood = None

        self.r_square = None

        self.r_error = None

        self.params = [self.W, self.b]
Ejemplo n.º 39
0
def ones_like(x):
    return T.ones_like(x)
Ejemplo n.º 40
0
def test_scan_debugprint5():

    k = tensor.iscalar("k")
    A = tensor.dvector("A")

    # Symbolic description of the result
    result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
                                  outputs_info=tensor.ones_like(A),
                                  non_sequences=A,
                                  n_steps=k)

    final_result = tensor.grad(result[-1].sum(), A)

    output_str = theano.printing.debugprint(final_result, file='str')
    lines = []
    for line in output_str.split('\n'):
        lines += [line]

    expected_output = """Subtensor{int64} [id A] ''
    |for{cpu,grad_of_scan_fn}.1 [id B] ''
    | |Elemwise{sub,no_inplace} [id C] ''
    | | |Subtensor{int64} [id D] ''
    | | | |Shape [id E] ''
    | | | | |for{cpu,scan_fn} [id F] ''
    | | | |   |k [id G]
    | | | |   |IncSubtensor{Set;:int64:} [id H] ''
    | | | |   | |AllocEmpty{dtype='float64'} [id I] ''
    | | | |   | | |Elemwise{add,no_inplace} [id J] ''
    | | | |   | | | |k [id G]
    | | | |   | | | |Subtensor{int64} [id K] ''
    | | | |   | | |   |Shape [id L] ''
    | | | |   | | |   | |Rebroadcast{0} [id M] ''
    | | | |   | | |   |   |DimShuffle{x,0} [id N] ''
    | | | |   | | |   |     |Elemwise{second,no_inplace} [id O] ''
    | | | |   | | |   |       |A [id P]
    | | | |   | | |   |       |DimShuffle{x} [id Q] ''
    | | | |   | | |   |         |TensorConstant{1.0} [id R]
    | | | |   | | |   |Constant{0} [id S]
    | | | |   | | |Subtensor{int64} [id T] ''
    | | | |   | |   |Shape [id U] ''
    | | | |   | |   | |Rebroadcast{0} [id M] ''
    | | | |   | |   |Constant{1} [id V]
    | | | |   | |Rebroadcast{0} [id M] ''
    | | | |   | |ScalarFromTensor [id W] ''
    | | | |   |   |Subtensor{int64} [id K] ''
    | | | |   |A [id P]
    | | | |Constant{0} [id X]
    | | |TensorConstant{1} [id Y]
    | |Subtensor{:int64:} [id Z] ''
    | | |Subtensor{::int64} [id BA] ''
    | | | |Subtensor{:int64:} [id BB] ''
    | | | | |for{cpu,scan_fn} [id F] ''
    | | | | |Constant{-1} [id BC]
    | | | |Constant{-1} [id BD]
    | | |ScalarFromTensor [id BE] ''
    | |   |Elemwise{sub,no_inplace} [id C] ''
    | |Subtensor{:int64:} [id BF] ''
    | | |Subtensor{:int64:} [id BG] ''
    | | | |Subtensor{::int64} [id BH] ''
    | | | | |for{cpu,scan_fn} [id F] ''
    | | | | |Constant{-1} [id BI]
    | | | |Constant{-1} [id BJ]
    | | |ScalarFromTensor [id BK] ''
    | |   |Elemwise{sub,no_inplace} [id C] ''
    | |Subtensor{::int64} [id BL] ''
    | | |IncSubtensor{Inc;int64::} [id BM] ''
    | | | |Elemwise{second,no_inplace} [id BN] ''
    | | | | |for{cpu,scan_fn} [id BO] ''
    | | | | | |k [id G]
    | | | | | |IncSubtensor{Set;:int64:} [id H] ''
    | | | | | |A [id P]
    | | | | |DimShuffle{x,x} [id BP] ''
    | | | |   |TensorConstant{0.0} [id BQ]
    | | | |IncSubtensor{Inc;int64} [id BR] ''
    | | | | |Elemwise{second,no_inplace} [id BS] ''
    | | | | | |Subtensor{int64::} [id BT] ''
    | | | | | | |for{cpu,scan_fn} [id BO] ''
    | | | | | | |Constant{1} [id BU]
    | | | | | |DimShuffle{x,x} [id BV] ''
    | | | | |   |TensorConstant{0.0} [id BQ]
    | | | | |Elemwise{second} [id BW] ''
    | | | | | |Subtensor{int64} [id BX] ''
    | | | | | | |Subtensor{int64::} [id BT] ''
    | | | | | | |Constant{-1} [id BY]
    | | | | | |DimShuffle{x} [id BZ] ''
    | | | | |   |Elemwise{second,no_inplace} [id CA] ''
    | | | | |     |Sum{acc_dtype=float64} [id CB] ''
    | | | | |     | |Subtensor{int64} [id BX] ''
    | | | | |     |TensorConstant{1.0} [id R]
    | | | | |Constant{-1} [id BY]
    | | | |Constant{1} [id BU]
    | | |Constant{-1} [id CC]
    | |Alloc [id CD] ''
    | | |TensorConstant{0.0} [id BQ]
    | | |Elemwise{add,no_inplace} [id CE] ''
    | | | |Elemwise{sub,no_inplace} [id C] ''
    | | | |TensorConstant{1} [id Y]
    | | |Subtensor{int64} [id CF] ''
    | |   |Shape [id CG] ''
    | |   | |A [id P]
    | |   |Constant{0} [id CH]
    | |A [id P]
    |Constant{-1} [id CI]

    Inner graphs of the scan ops:

    for{cpu,grad_of_scan_fn}.1 [id B] ''
    >Elemwise{add,no_inplace} [id CJ] ''
    > |Elemwise{mul} [id CK] ''
    > | |<TensorType(float64, vector)> [id CL] -> [id BL]
    > | |A_copy [id CM] -> [id P]
    > |<TensorType(float64, vector)> [id CN] -> [id BL]
    >Elemwise{add,no_inplace} [id CO] ''
    > |Elemwise{mul} [id CP] ''
    > | |<TensorType(float64, vector)> [id CL] -> [id BL]
    > | |<TensorType(float64, vector)> [id CQ] -> [id Z]
    > |<TensorType(float64, vector)> [id CR] -> [id CD]

    for{cpu,scan_fn} [id F] ''
    >Elemwise{mul,no_inplace} [id CS] ''
    > |<TensorType(float64, vector)> [id CT] -> [id H]
    > |A_copy [id CU] -> [id P]

    for{cpu,scan_fn} [id F] ''
    >Elemwise{mul,no_inplace} [id CS] ''

    for{cpu,scan_fn} [id F] ''
    >Elemwise{mul,no_inplace} [id CS] ''

    for{cpu,scan_fn} [id BO] ''
    >Elemwise{mul,no_inplace} [id CS] ''

    for{cpu,scan_fn} [id BO] ''
    >Elemwise{mul,no_inplace} [id CS] ''"""

    for truth, out in zip(expected_output.split("\n"), lines):
        assert truth.strip() == out.strip()
Ejemplo n.º 41
0
def ones_like(x, dtype=None, name=None):
    """Instantiates an all-ones variable with the same shape as x. """
    return T.ones_like(x, dtype=dtype)
Ejemplo n.º 42
0
 def set_output(self):
     self._output = tensor.ones_like(
         self._prev_layer.output) - self._prev_layer.output
Ejemplo n.º 43
0
    def ready(self):
        args = self.args
        w_emb_layer = self.w_emb_layer
        c_emb_layer = self.c_emb_layer
        r_emb_layers = self.r_emb_layers
        r_matrix_layers = self.r_matrix_layers

        char_dim = self.char_dim = args.char_dim
        char_lstm_dim = self.char_lstm_dim = args.char_lstm_dim
        word_dim = self.word_dim = args.word_dim
        word_lstm_dim = self.word_lstm_dim = args.word_lstm_dim

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        word_ids = self.word_ids = T.ivector('word_ids')
        char_ids = self.char_ids = T.imatrix('char_ids')
        char_lens = self.char_lens = T.fvector('char_lens')
        char_masks = self.char_masks = T.imatrix('char_masks')
        up_ids = self.up_ids = T.imatrix('up_ids')
        up_rels = self.up_rels = T.imatrix('up_rels')
        up_id_masks = self.up_id_masks = T.imatrix('up_id_masks')
        down_ids = self.down_ids = T.imatrix('down_ids')
        down_rels = self.down_rels = T.imatrix('down_rels')
        down_id_masks = self.down_id_masks = T.imatrix('down_id_masks')
        tag_ids = self.tag_ids = T.ivector('tag_ids')

        layers = self.layers = [w_emb_layer, c_emb_layer]
        layers.extend(r_emb_layers)
        layers.extend(r_matrix_layers)

        inputs = self.inputs = []

        inputs.append(self.word_ids)
        inputs.append(self.char_ids)
        inputs.append(self.char_lens)
        inputs.append(self.char_masks)
        inputs.append(self.up_ids)
        inputs.append(self.up_rels)
        inputs.append(self.up_id_masks)
        inputs.append(self.down_ids)
        inputs.append(self.down_rels)
        inputs.append(self.down_id_masks)
        inputs.append(self.tag_ids)
        wslices = w_emb_layer.forward(word_ids)
        cslices = c_emb_layer.forward(char_ids.ravel())
        cslices = cslices.reshape(
            (char_ids.shape[0], char_ids.shape[1], char_dim))
        cslices = cslices.dimshuffle(1, 0, 2)

        bv_ur_slicess = []
        bv_dr_slicess = []
        b_ur_slicess = []
        b_dr_slicess = []

        bv_ur_matrixss = []
        bv_dr_matrixss = []
        b_ur_matrixss = []
        b_dr_matrixss = []

        for r_matrix_layer in r_matrix_layers:
            bv_ur_matrixs = r_matrix_layer.forward1(up_rels.ravel())
            bv_dr_matrixs = r_matrix_layer.forward1(down_rels.ravel())
            b_ur_matrixs = r_matrix_layer.forward2(up_rels.ravel())
            b_dr_matrixs = r_matrix_layer.forward2(down_rels.ravel())
            bv_ur_matrixss.append(
                bv_ur_matrixs.reshape(
                    (up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            bv_dr_matrixss.append(
                bv_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1],
                                       word_dim, word_dim)))
            b_ur_matrixss.append(
                b_ur_matrixs.reshape(
                    (up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            b_dr_matrixss.append(
                b_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1],
                                      word_dim, word_dim)))

        for r_emb_layer in r_emb_layers:
            bv_ur_slices = r_emb_layer.forward(up_rels.ravel())
            bv_dr_slices = r_emb_layer.forward(down_rels.ravel())
            b_ur_slices = r_emb_layer.forward2(up_rels.ravel())
            b_dr_slices = r_emb_layer.forward2(down_rels.ravel())
            bv_ur_slicess.append(
                bv_ur_slices.reshape(
                    (up_rels.shape[0], up_rels.shape[1], word_dim)))
            bv_dr_slicess.append(
                bv_dr_slices.reshape(
                    (down_rels.shape[0], down_rels.shape[1], word_dim)))
            b_ur_slicess.append(
                b_ur_slices.reshape(
                    (up_rels.shape[0], up_rels.shape[1], word_dim)))
            b_dr_slicess.append(
                b_dr_slices.reshape(
                    (down_rels.shape[0], down_rels.shape[1], word_dim)))

        char_masks = char_masks.dimshuffle(1, 0)

        prev_output = wslices
        prev_size = word_dim

        if char_dim:
            layers.append(
                LSTM(n_in=char_dim,
                     n_out=char_lstm_dim,
                     direction='bi' if args.char_bidirect else 'si'))
            prev_output_2 = cslices
            prev_output_2 = apply_dropout(prev_output_2, dropout, v2=True)
            prev_output_2 = layers[-1].forward_all(cslices, char_masks)
            prev_output_2 = T.sum(prev_output_2, axis=0)
            prev_output_2 = prev_output_2 / (1e-6 * T.ones_like(char_lens) +
                                             char_lens).dimshuffle(0, 'x')

            prev_size += char_lstm_dim
            prev_output = T.concatenate([prev_output, prev_output_2], axis=1)

        prev_output = apply_dropout(prev_output, dropout)
        if args.conv != 0:
            for ind in range(args.clayer):
                layers.append(GraphCNNTensor(
                    n_in=prev_size,
                    n_out=prev_size,
                ))
                residual = True
                if ind == 0:
                    residual = False
                prev_output = layers[-1].forward_all(prev_output,
                                                     up_ids,
                                                     up_id_masks,
                                                     bv_ur_slicess[ind],
                                                     bv_ur_matrixss[ind],
                                                     b_ur_slicess[ind],
                                                     b_ur_matrixss[ind],
                                                     down_ids,
                                                     down_id_masks,
                                                     bv_dr_slicess[ind],
                                                     bv_dr_matrixss[ind],
                                                     b_dr_slicess[ind],
                                                     b_dr_matrixss[ind],
                                                     residual=residual)
                prev_output = apply_dropout(prev_output, dropout)

        prev_size *= 3
        layers.append(
            LSTM(n_in=prev_size,
                 n_out=word_lstm_dim,
                 direction='bi' if args.word_bidirect else 'si'))

        prev_output = prev_output.dimshuffle(0, 'x', 1)
        prev_output = layers[-1].forward_all(prev_output)
        prev_output = prev_output.reshape(
            (prev_output.shape[0], prev_output.shape[-1]))

        prev_size = word_lstm_dim

        layers.append(
            Layer(
                n_in=prev_size,
                n_out=args.classes,
                activation=linear,  #ReLU,
                has_bias=False))

        n_tags = args.classes
        s_len = char_ids.shape[0]
        tags_scores = layers[-1].forward(prev_output)
        transitions = shared((n_tags + 2, n_tags + 2), 'transitions')
        small = -1000
        b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
        e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
        observations = T.concatenate([tags_scores, small * T.ones((s_len, 2))],
                                     axis=1)

        observations = T.concatenate([b_s, observations, e_s], axis=0)

        real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()
        b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
        e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
        padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)

        pre_ids = T.arange(s_len + 1)

        s_ids = T.arange(s_len + 1) + 1

        real_path_score += transitions[padded_tags_ids[pre_ids],
                                       padded_tags_ids[s_ids]].sum()

        all_paths_scores = CRFForward(observations, transitions)
        self.nll_loss = nll_loss = -(real_path_score - all_paths_scores)
        preds = CRFForward(observations,
                           transitions,
                           viterbi=True,
                           return_alpha=False,
                           return_best_sequence=True)

        self.pred = preds[1:-1]

        self.l2_sqr = None
        params = self.params = [transitions]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        #for l, i in zip(layers[3:], range(len(layers[3:]))):
        for l, i in zip(
                layers[2 + len(r_emb_layers) + len(r_matrix_layers):],
                range(
                    len(layers[2 + len(r_emb_layers) +
                               len(r_matrix_layers):]))):
            say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out))

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))

        cost = self.nll_loss + self.l2_sqr

        lr_method_name = args.learning
        lr_method_parameters = {}
        lr_method_parameters['lr'] = args.learning_rate
        updates = Optimization(clip=5.0).get_updates(lr_method_name, cost,
                                                     params,
                                                     **lr_method_parameters)

        f_train = theano.function(inputs=self.inputs,
                                  outputs=[cost, nll_loss],
                                  updates=updates,
                                  allow_input_downcast=True)

        f_eval = theano.function(inputs=self.inputs[:-1],
                                 outputs=self.pred,
                                 allow_input_downcast=True)

        return f_train, f_eval
Ejemplo n.º 44
0
    def __theano_build__(self):
        E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c

        x_a = T.ivector('x_a')
        x_b = T.ivector('x_b')
        y = T.lvector('y')

        def forward_step(x_t, s_t_prev):
            # Word embedding layer
            x_e = E[:, x_t]
            # GRU layer 1
            z_t = T.nnet.hard_sigmoid(U[0].dot(x_e) +
                                      W[0].dot(s_t_prev)) + b[0]
            r_t = T.nnet.hard_sigmoid(U[1].dot(x_e) +
                                      W[1].dot(s_t_prev)) + b[1]
            c_t = T.tanh(U[2].dot(x_e) + W[2].dot(s_t_prev * r_t) + b[2])
            s_t = (T.ones_like(z_t) - z_t) * c_t + z_t * s_t_prev
            # directly return the hidden state as intermidate output
            return [s_t]

        # sentence a vector (states)
        a_s, updates = theano.scan(forward_step,
                                   sequences=x_a,
                                   truncate_gradient=self.bptt_truncate,
                                   outputs_info=T.zeros(self.hidden_dim))

        # sentence b vector (states)
        b_s, updates = theano.scan(forward_step,
                                   sequences=x_b,
                                   truncate_gradient=self.bptt_truncate,
                                   outputs_info=T.zeros(self.hidden_dim))

        # semantic similarity
        # s_sim = manhattan_distance(a_s[-1],b_s[-1])

        # for classification using simple strategy
        sena = a_s[-1]
        senb = b_s[-1]

        combined_s = T.concatenate([sena, senb], axis=0)

        # softmax class
        o = T.nnet.softmax(V.dot(combined_s) + c)[0]

        # in case the o contains 0 which cause inf
        eps = np.asarray([1.0e-10] * self.label_dim,
                         dtype=theano.config.floatX)
        o = o + eps
        om = o.reshape((1, o.shape[0]))
        prediction = T.argmax(om, axis=1)
        o_error = T.nnet.categorical_crossentropy(om, y)

        # cost
        cost = T.sum(o_error)

        # updates
        updates = sgd_updates_adadelta(norm=0, params=self.params, cost=cost)

        # monitor parameter
        mV = V * T.ones_like(V)
        mc = c * T.ones_like(c)
        mU = U * T.ones_like(U)
        mW = W * T.ones_like(W)

        gV = T.grad(cost, V)
        gc = T.grad(cost, c)
        gU = T.grad(cost, U)
        gW = T.grad(cost, W)

        mgV = gV * T.ones_like(gV)
        mgc = gc * T.ones_like(gc)
        mgU = gU * T.ones_like(gU)
        mgW = gW * T.ones_like(gW)

        # Assign functions
        self.monitor = theano.function([x_a, x_b],
                                       [sena, senb, mV, mc, mU, mW])
        self.monitor_grad = theano.function([x_a, x_b, y],
                                            [mgV, mgc, mgU, mgW])
        self.predict = theano.function([x_a, x_b], om)
        self.predict_class = theano.function([x_a, x_b], prediction)
        self.ce_error = theano.function([x_a, x_b, y], cost)
        # self.bptt = theano.function([x,y],[dE,dU,dW,db,dV,dc])

        # SGD parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        # find the nan
        self.sgd_step = theano.function(
            [x_a, x_b, y], [],
            updates=updates
            # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
        )
Ejemplo n.º 45
0
def custom_objective(y_true, y_pred):
    'Custom Objective function'

    y_true = T.flatten(y_true)
    y_pred = T.flatten(y_pred)

    n_seg = 32  # Because we have 32 segments per video.
    nvid = 60
    n_exp = nvid / 2
    Num_d = 32 * nvid

    sub_max = T.ones_like(
        y_pred
    )  # sub_max represents the highest scoring instants in bags (videos).
    sub_sum_labels = T.ones_like(
        y_true
    )  # It is used to sum the labels in order to distinguish between normal and abnormal videos.
    sub_sum_l1 = T.ones_like(
        y_true
    )  # For holding the concatenation of summation of scores in the bag.
    sub_l2 = T.ones_like(
        y_true)  # For holding the concatenation of L2 of score in the bag.

    for ii in xrange(0, nvid, 1):
        # For Labels
        mm = y_true[ii * n_seg:ii * n_seg + n_seg]
        sub_sum_labels = T.concatenate([
            sub_sum_labels, T.stack(T.sum(mm))
        ])  # Just to keep track of abnormal and normal vidoes

        # For Features scores
        Feat_Score = y_pred[ii * n_seg:ii * n_seg + n_seg]
        sub_max = T.concatenate(
            [sub_max, T.stack(T.max(Feat_Score))]
        )  # Keep the maximum score of scores of all instances in a Bag (video)
        sub_sum_l1 = T.concatenate([
            sub_sum_l1, T.stack(T.sum(Feat_Score))
        ])  # Keep the sum of scores of all instances in a Bag (video)

        z1 = T.ones_like(Feat_Score)
        z2 = T.concatenate([z1, Feat_Score])
        z3 = T.concatenate([Feat_Score, z1])
        z_22 = z2[31:]
        z_44 = z3[:33]
        z = z_22 - z_44
        z = z[1:32]
        z = T.sum(T.sqr(z))
        sub_l2 = T.concatenate([sub_l2, T.stack(z)])

    # sub_max[Num_d:] means include all elements after Num_d.
    # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14]
    # z=x[4:]
    #[  6.  12.   7.  18.   9.  14.]

    sub_score = sub_max[
        Num_d:]  # We need this step since we have used T.ones_like
    F_labels = sub_sum_labels[
        Num_d:]  # We need this step since we have used T.ones_like
    #  F_labels contains integer 32 for normal video and 0 for abnormal videos. This because of labeling done at the end of "load_dataset_Train_batch"

    # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14]
    # z=x[:4]
    # [ 2 4 3 9]... This shows 0 to 3 elements

    sub_sum_l1 = sub_sum_l1[
        Num_d:]  # We need this step since we have used T.ones_like
    sub_sum_l1 = sub_sum_l1[:n_exp]
    sub_l2 = sub_l2[Num_d:]  # We need this step since we have used T.ones_like
    sub_l2 = sub_l2[:n_exp]

    indx_nor = theano.tensor.eq(F_labels, 32).nonzero(
    )[0]  # Index of normal videos: Since we labeled 1 for each of 32 segments of normal videos F_labels=32 for normal video
    indx_abn = theano.tensor.eq(F_labels, 0).nonzero()[0]

    n_Nor = n_exp

    Sub_Nor = sub_score[indx_nor]  # Maximum Score for each of abnormal video
    Sub_Abn = sub_score[indx_abn]  # Maximum Score for each of normal video

    z = T.ones_like(y_true)
    for ii in xrange(0, n_Nor, 1):
        sub_z = T.maximum(1 - Sub_Abn + Sub_Nor[ii], 0)
        z = T.concatenate([z, T.stack(T.sum(sub_z))])

    z = z[Num_d:]  # We need this step since we have used T.ones_like
    z = T.mean(z, axis=-1) + 0.00008 * T.sum(sub_sum_l1) + 0.00008 * T.sum(
        sub_l2)  # Final Loss f

    return z
    def __init__(self, environment, rho=0.9, rms_epsilon=0.0001, momentum=0, clip_delta=0, freeze_interval=1000, batch_size=32, network_type=None, update_rule="rmsprop", batch_accumulator="sum", random_state=np.random.RandomState(), double_Q=False, neural_network=NN):
        """ Initialize environment
        
        """
        QNetwork.__init__(self,environment, batch_size)
        
        self._rho = rho
        self._rms_epsilon = rms_epsilon
        self._momentum = momentum
        self._clip_delta = clip_delta
        self._freeze_interval = freeze_interval
        self._double_Q = double_Q
        self._random_state = random_state
        
        self.update_counter = 0
        
        states=[]   # list of symbolic variables for each of the k element in the belief state
                    # --> [ T.tensor4 if observation of element=matrix, T.tensor3 if vector, T.tensor 2 if scalar ]
        next_states=[] # idem than states at t+1 
        self.states_shared=[] # list of shared variable for each of the k element in the belief state
        self.next_states_shared=[] # idem that self.states_shared at t+1

        for i, dim in enumerate(self._input_dimensions):
            if len(dim) == 3:
                states.append(T.tensor4("%s_%s" % ("state", i)))
                next_states.append(T.tensor4("%s_%s" % ("next_state", i)))

            elif len(dim) == 2:
                states.append(T.tensor3("%s_%s" % ("state", i)))
                next_states.append(T.tensor3("%s_%s" % ("next_state", i)))
                
            elif len(dim) == 1:            
                states.append( T.matrix("%s_%s" % ("state", i)) )
                next_states.append( T.matrix("%s_%s" % ("next_state", i)) )
                
            self.states_shared.append(theano.shared(np.zeros((batch_size,) + dim, dtype=theano.config.floatX) , borrow=False))
            self.next_states_shared.append(theano.shared(np.zeros((batch_size,) + dim, dtype=theano.config.floatX) , borrow=False))
        
        print("Number of observations per state: {}".format(len(self.states_shared)))
        print("For each observation, historySize + ponctualObs_i.shape: {}".format(self._input_dimensions))
                
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')
        thediscount = T.scalar(name='thediscount', dtype=theano.config.floatX)
        thelr = T.scalar(name='thelr', dtype=theano.config.floatX)
        
        Q_net=neural_network(self._batch_size, self._input_dimensions, self._n_actions, self._random_state)
        self.q_vals, self.params, shape_after_conv = Q_net._buildDQN(states)
        
        print("Number of neurons after spatial and temporal convolution layers: {}".format(shape_after_conv))

        self.next_q_vals, self.next_params, shape_after_conv = Q_net._buildDQN(next_states)
        self._resetQHat()

        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))
        
        
        if(self._double_Q==True):
            givens_next={}
            for i, x in enumerate(self.next_states_shared):
                givens_next[ states[i] ] = x

            self.next_q_vals_current_qnet=theano.function([], self.q_vals,
                                          givens=givens_next)

            next_q_curr_qnet = theano.clone(self.next_q_vals)

            argmax_next_q_vals=T.argmax(next_q_curr_qnet, axis=1, keepdims=True)

            max_next_q_vals=self.next_q_vals[T.arange(batch_size),argmax_next_q_vals.reshape((-1,))].reshape((-1, 1))

        else:
            max_next_q_vals=T.max(self.next_q_vals, axis=1, keepdims=True)


        not_terminals=T.ones_like(terminals) - terminals

        target = rewards + not_terminals * thediscount * max_next_q_vals

        q_val=self.q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1, 1))
        # Note : Strangely (target - q_val) lead to problems with python 3.5, theano 0.8.0rc and floatX=float32...
        diff = - q_val + target 

        if self._clip_delta > 0:
            # This loss function implementation is taken from
            # https://github.com/spragunr/deep_q_rl
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            # 
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self._clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss_ind = 0.5 * quadratic_part ** 2 + self._clip_delta * linear_part
        else:
            loss_ind = 0.5 * diff ** 2

        if batch_accumulator == 'sum':
            loss = T.sum(loss_ind)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss_ind)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

        givens = {
            rewards: self.rewards_shared,
            actions: self.actions_shared, ## actions not needed!
            terminals: self.terminals_shared
        }
        
        for i, x in enumerate(self.states_shared):
            givens[ states[i] ] = x 
        for i, x in enumerate(self.next_states_shared):
            givens[ next_states[i] ] = x
                
                
        gparams=[]
        for p in self.params:
            gparam =  T.grad(loss, p)
            gparams.append(gparam)

        updates = []
        
        if update_rule == 'deepmind_rmsprop':
            updates = deepmind_rmsprop(loss, self.params, gparams, thelr, self._rho,
                                       self._rms_epsilon)
        elif update_rule == 'rmsprop':
            for i,(p, g) in enumerate(zip(self.params, gparams)):                
                acc = theano.shared(p.get_value() * 0.)
                acc_new = rho * acc + (1 - self._rho) * g ** 2
                gradient_scaling = T.sqrt(acc_new + self._rms_epsilon)
                g = g / gradient_scaling
                updates.append((acc, acc_new))
                updates.append((p, p - thelr * g))

        elif update_rule == 'sgd':
            for i, (param, gparam) in enumerate(zip(self.params, gparams)):
                updates.append((param, param - thelr * gparam))
        else:
            raise ValueError("Unrecognized update: {}".format(update_rule))
    
        
        if(self._double_Q==True):
            self._train = theano.function([thediscount, thelr, next_q_curr_qnet], [loss, loss_ind, self.q_vals], updates=updates,
                                      givens=givens,
                                      on_unused_input='warn')
        else:
            self._train = theano.function([thediscount, thelr], [loss, loss_ind, self.q_vals], updates=updates,
                                      givens=givens,
                                      on_unused_input='warn')
        givens2={}
        for i, x in enumerate(self.states_shared):
            givens2[ states[i] ] = x 

        self._q_vals = theano.function([], self.q_vals,
                                      givens=givens2,
                                      on_unused_input='warn')
Ejemplo n.º 47
0
    def __init__(self, We_initial, params):
        self.textfile = open(params.outfile, 'w')
        We = theano.shared(We_initial)
        embsize = We_initial.shape[1]
        hidden = params.hidden
        start0 = np.random.uniform(-0.02, 0.02, (1, 26)).astype('float32')
        end0 = np.zeros((1, 26)).astype('float32')
        end0[0, -1] = 1.0
        start = theano.shared(start0)
        end = theano.shared(end0)

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

        if params.emb == 1:
            l_emb_word = lasagne.layers.EmbeddingLayer(
                l_in_word,
                input_size=We_initial.shape[0],
                output_size=embsize,
                W=We)
        else:
            l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word,
                                                hidden,
                                                mask_input=l_mask_word,
                                                backwards=True)
        l_reshapef = lasagne.layers.ReshapeLayer(l_lstm_wordf, (-1, hidden))
        l_reshapeb = lasagne.layers.ReshapeLayer(l_lstm_wordb, (-1, hidden))
        concat2 = lasagne.layers.ConcatLayer([l_reshapef, l_reshapeb])
        l_local = lasagne.layers.DenseLayer(
            concat2, num_units=25, nonlinearity=lasagne.nonlinearities.linear)
        f_params = lasagne.layers.get_all_params(l_local, trainable=True)

        Wyy0 = np.random.uniform(-0.02, 0.02, (26, 26)).astype('float32')
        Wyy = theano.shared(Wyy0)
        d_params = lasagne.layers.get_all_params(l_local, trainable=True)
        d_params.append(Wyy)
        self.d_params = d_params

        l_in_word_a = lasagne.layers.InputLayer((None, None))
        l_mask_word_a = lasagne.layers.InputLayer(shape=(None, None))
        l_emb_word_a = lasagne_embedding_layer_2(l_in_word_a, embsize,
                                                 l_emb_word.W)

        if params.dropout:
            l_emb_word_a = lasagne.layers.DropoutLayer(l_emb_word_a, p=0.5)

        l_lstm_wordf_a = lasagne.layers.LSTMLayer(l_emb_word_a,
                                                  hidden,
                                                  mask_input=l_mask_word_a)
        l_lstm_wordb_a = lasagne.layers.LSTMLayer(l_emb_word_a,
                                                  hidden,
                                                  mask_input=l_mask_word_a,
                                                  backwards=True)
        l_reshapef_a = lasagne.layers.ReshapeLayer(l_lstm_wordf_a,
                                                   (-1, hidden))
        l_reshapeb_a = lasagne.layers.ReshapeLayer(l_lstm_wordb_a,
                                                   (-1, hidden))
        concat2_a = lasagne.layers.ConcatLayer([l_reshapef_a, l_reshapeb_a])
        if params.dropout:
            concat2_a = lasagne.layers.DropoutLayer(concat2_a, p=0.5)
        l_local_a = lasagne.layers.DenseLayer(
            concat2_a,
            num_units=25,
            nonlinearity=lasagne.nonlinearities.softmax)

        a_params = lasagne.layers.get_all_params(l_local_a, trainable=True)
        self.a_params = a_params

        y_in = T.ftensor3()
        y = T.imatrix()
        g = T.imatrix()
        gmask = T.fmatrix()
        y_mask = T.fmatrix()
        length = T.iscalar()
        # shape: n, L, 1
        #y1 = T.ftensor3()
        # shape: n, 1, 46

        predy0 = lasagne.layers.get_output(l_local_a, {
            l_in_word_a: g,
            l_mask_word_a: gmask
        })
        predy = predy0.reshape((-1, length, 25))
        predy = predy * gmask[:, :, None]

        #newpredy = T.concatenate([predy, y0] , axis=2)
        # n , L, 46, 46
        # predy0: n, L, 25
        # energy loss
        def inner_function(targets_one_step, mask_one_step, prev_label,
                           tg_energy):
            """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """
            new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1])
            new_ta_energy = tg_energy + T.sum(new_ta_energy * targets_one_step,
                                              axis=1)
            tg_energy_t = T.switch(mask_one_step, new_ta_energy, tg_energy)
            return [targets_one_step, new_ta_energy]

        # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels)
        # but scan requires the iterable dimension to be first
# So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels)
        local_energy = lasagne.layers.get_output(l_local, {
            l_in_word: g,
            l_mask_word: gmask
        })
        local_energy = local_energy.reshape((-1, length, 25))
        local_energy = local_energy * gmask[:, :, None]

        targets_shuffled = y_in.dimshuffle(1, 0, 2)
        masks_shuffled = gmask.dimshuffle(1, 0)
        target_time0 = targets_shuffled[0]
        initial_energy0 = T.dot(target_time0, Wyy[-1, :-1])

        length_index = T.sum(gmask, axis=1) - 1
        length_index = T.cast(length_index, 'int32')

        l_LM_in = lasagne.layers.InputLayer((None, None, 26))
        l_LM_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_LM_lstm = lasagne.layers.LSTMLayer(l_LM_in,
                                             2 * hidden,
                                             mask_input=l_LM_mask)
        l_reshape_LM = lasagne.layers.ReshapeLayer(l_LM_lstm, (-1, 2 * hidden))
        l_LM = lasagne.layers.DenseLayer(
            l_reshape_LM,
            num_units=26,
            nonlinearity=lasagne.nonlinearities.softmax)

        LM_params = lasagne.layers.get_all_params(l_LM, trainable=True)
        LM_params.append(start)

        f = open('Label_LM.pickle', 'r')
        data = pickle.load(f)
        f.close()
        for idx, p in enumerate(LM_params):
            p.set_value(data[idx])

        initials = [target_time0, initial_energy0]
        [_, target_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials,
            sequences=[targets_shuffled[1:], masks_shuffled[1:]])

        pos_end_target = y_in[T.arange(length_index.shape[0]), length_index]
        """add ground truth labels LM cost"""
        pos_predy_tmp0 = y_in[:, :, 0].reshape((-1, length, 1))
        pos_tmp0 = T.zeros_like(pos_predy_tmp0)
        pos_predy_lm = T.concatenate([y_in, pos_tmp0], axis=2)

        pos_predy_tmp = pos_predy_lm[:, 0, :].reshape((-1, 1, 26))
        pos_tmp = T.ones_like(pos_predy_tmp)

        sos = pos_tmp * (start.dimshuffle('x', 0, 1))
        eos = pos_tmp * (end.dimshuffle('x', 0, 1))
        pos_y_lm_in = T.concatenate([sos, pos_predy_lm], axis=1)
        pos_y_lm_out = T.concatenate([pos_predy_lm, eos], axis=1)

        pos_lm_mask_var = T.concatenate(
            [pos_tmp[:, 0, 0].reshape((-1, 1)), gmask], axis=1)
        pos_LM_out = lasagne.layers.get_output(l_LM, {
            l_LM_in: pos_y_lm_in,
            l_LM_mask: pos_lm_mask_var
        })
        pos_LM_out = pos_LM_out.reshape((-1, length + 1, 26))
        pos_LM_cost = T.sum(T.log(
            T.sum(pos_LM_out[:, :-1, :] * pos_y_lm_out[:, :-1, :], axis=2) +
            eps) * gmask,
                            axis=1)

        pos_cost = target_energies[-1] + T.sum(
            T.sum(local_energy * y_in, axis=2) * gmask, axis=1) + T.dot(
                pos_end_target, Wyy[:-1, -1]) + params.lm * pos_LM_cost
        check = T.sum(T.sum(local_energy * y_in, axis=2) * gmask, axis=1)

        negtargets_shuffled = predy.dimshuffle(1, 0, 2)
        negtarget_time0 = negtargets_shuffled[0]
        neginitial_energy0 = T.dot(negtarget_time0, Wyy[-1, :-1])
        """predict label language cost"""

        neg_predy_tmp0 = predy[:, :, 0].reshape((-1, length, 1))
        neg_tmp0 = T.zeros_like(neg_predy_tmp0)
        neg_predy_lm = T.concatenate([predy, neg_tmp0], axis=2)

        neg_predy_tmp = neg_predy_lm[:, 0, :].reshape((-1, 1, 26))
        neg_tmp = T.ones_like(neg_predy_tmp)

        sos = neg_tmp * (start.dimshuffle('x', 0, 1))
        eos = neg_tmp * (end.dimshuffle('x', 0, 1))

        neg_y_lm_in = T.concatenate([sos, neg_predy_lm], axis=1)
        neg_y_lm_out = T.concatenate([neg_predy_lm, eos], axis=1)

        neg_lm_mask_var = T.concatenate(
            [neg_tmp[:, 0, 0].reshape((-1, 1)), gmask], axis=1)
        neg_LM_out = lasagne.layers.get_output(l_LM, {
            l_LM_in: neg_y_lm_in,
            l_LM_mask: neg_lm_mask_var
        })
        neg_LM_out = neg_LM_out.reshape((-1, length + 1, 26))
        neg_LM_cost = T.sum(T.log(
            T.sum(neg_LM_out[:, :-1, :] * neg_y_lm_out[:, :-1, :], axis=2) +
            eps) * gmask,
                            axis=1)

        neginitials = [negtarget_time0, neginitial_energy0]
        [_, negtarget_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=neginitials,
            sequences=[negtargets_shuffled[1:], masks_shuffled[1:]])

        neg_end_target = predy[T.arange(length_index.shape[0]), length_index]
        neg_cost = negtarget_energies[-1] + T.sum(
            T.sum(local_energy * predy, axis=2) * gmask, axis=1) + T.dot(
                neg_end_target, Wyy[:-1, -1]) + params.lm * neg_LM_cost

        y_f = y.flatten()
        predy_f = predy.reshape((-1, 25))

        ce_hinge = lasagne.objectives.categorical_crossentropy(
            predy_f + eps, y_f)
        ce_hinge = ce_hinge.reshape((-1, length))
        ce_hinge = T.sum(ce_hinge * gmask, axis=1)

        entropy_term = -T.sum(predy_f * T.log(predy_f + eps), axis=1)
        entropy_term = entropy_term.reshape((-1, length))
        entropy_term = T.sum(entropy_term * gmask, axis=1)

        delta0 = T.sum(abs((y_in - predy)), axis=2) * gmask
        delta0 = T.sum(delta0, axis=1)
        hinge_cost = delta0 + neg_cost - pos_cost
        hinge_cost = hinge_cost * T.gt(hinge_cost, 0)
        d_cost = T.mean(hinge_cost)
        d_cost0 = d_cost
        """select different regulizer"""
        g_cost = -d_cost0 + params.l2 * sum(
            lasagne.regularization.l2(x)
            for x in a_params) + params.l3 * T.mean(ce_hinge)
        ###g_cost = -d_cost0 + params.L2* sum(lasagne.regularization.l2(x) for x in a_params) - params.L31*T.mean(entropy_term)
        d_cost = d_cost0 + params.l2 * sum(
            lasagne.regularization.l2(x) for x in d_params)

        self.a_params = a_params
        updates_g = lasagne.updates.sgd(g_cost, a_params, params.eta)
        updates_g = lasagne.updates.apply_momentum(updates_g,
                                                   a_params,
                                                   momentum=0.9)
        self.train_g = theano.function(
            [g, gmask, y, y_in, length],
            [g_cost, d_cost0, pos_cost, neg_cost, delta0, check],
            updates=updates_g,
            on_unused_input='ignore')
        updates_d = lasagne.updates.adam(d_cost, d_params, 0.001)
        self.train_d = theano.function(
            [g, gmask, y, y_in, length],
            [d_cost, d_cost0, pos_cost, neg_cost, delta0, check],
            updates=updates_d,
            on_unused_input='ignore')

        # test the model and retuning the infernce network
        predy_test = lasagne.layers.get_output(l_local_a, {
            l_in_word_a: g,
            l_mask_word_a: gmask
        },
                                               deterministic=True)
        predy_test = predy_test.reshape((-1, length, 25))
        pred = T.argmax(predy_test, axis=2)
        pg = T.eq(pred, y)
        pg = pg * gmask
        acc = 1.0 * T.sum(pg) / T.sum(gmask)

        negtargets_shuffled_test = predy_test.dimshuffle(1, 0, 2)
        negtarget_time0_test = negtargets_shuffled_test[0]

        neginitial_energy0_test = T.dot(negtarget_time0_test, Wyy[-1, :-1])
        neginitials_test = [negtarget_time0_test, neginitial_energy0_test]
        [_, negtarget_energies_test], _ = theano.scan(
            fn=inner_function,
            outputs_info=neginitials_test,
            sequences=[negtargets_shuffled_test[1:], masks_shuffled[1:]])
        end_test_target = predy_test[T.arange(length_index.shape[0]),
                                     length_index]
        neg_cost_test = negtarget_energies_test[-1] + T.sum(
            T.sum(local_energy * predy_test, axis=2) * gmask, axis=1) + T.dot(
                end_test_target, Wyy[:-1, -1])

        test_cost = -T.mean(neg_cost_test) + params.l3 * T.mean(
            ce_hinge) - params.lm * T.mean(neg_LM_cost)
        test_updates = lasagne.updates.sgd(test_cost, a_params, params.eta)
        test_updates = lasagne.updates.apply_momentum(test_updates,
                                                      a_params,
                                                      momentum=0.9)
        self.test_time_turning = theano.function([g, gmask, y, length],
                                                 test_cost,
                                                 updates=test_updates,
                                                 on_unused_input='ignore')
        self.test_time1 = theano.function([g, gmask, y, y_in, length], [
            acc,
            T.mean(neg_cost),
            T.mean(pos_cost), params.l3 * T.mean(ce_hinge)
        ],
                                          on_unused_input='ignore')
        self.test_time = theano.function([g, gmask, y, length], acc)
        self.test_time2 = theano.function([g, gmask, length], pred)
Ejemplo n.º 48
0
	def _get_cost(
		self,
		output,
		truth,
		S,
		B,
		C,
		rescore=False,
		lmbda_coord=5.,
		lmbda_noobj=0.5,
		lmbda_obj=1.,
		min_overlap=1e-5,
		use_overlap=False
		):
		'''
		Calculates cost for multiple objects in a scene without for loops or scan (so reduces the amount of variable
		created in the theano computation graph).  A cell is associated with a certain object if the iou of that cell
		and the object is higher than any other ground truth object. and the rest of the objectness scores are pushed
		towards zero.

		Returns the cost and list of variable that I don't want to backpropagate through.

		Params:
		------

		use_overlap: Yolo, as described in the original paper, assigns a ground truth label if the ground truth box overlaps at all with
				the cell.  I've found that the result is that with new images with many smaller objects because several objects might be
				overlap a single cell, this causes a sort of average bounding box which looks pretty bad.  So by using overlap, you don't
				assign a cell to a ground truth label unless it overlaps by some semi-significant amount.
		'''
		
		# calculate height/width of individual cell
		block_height, block_width = 1. / S[0], 1./ S[1]

		# get the offset of each cell
		offset_x, offset_y = meshgrid2D(T.arange(0,1,block_width), T.arange(0,1,block_height))

		# get indices for x,y,w,h,object-ness for easy access
		x_idx, y_idx = T.arange(0,5*B,5), T.arange(1,5*B, 5)
		w_idx, h_idx = T.arange(2,5*B,5), T.arange(3,5*B,5)
		conf_idx = T.arange(4,5*B,5)

		# Get position predictions with offsets.
		pred_x = (output[:,x_idx] + offset_x.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3)
		pred_y = (output[:,y_idx] + offset_y.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3)
		pred_w, pred_h = output[:,w_idx].dimshuffle(0,'x',1,2,3), output[:,h_idx].dimshuffle(0,'x',1,2,3)
		#pred_w, pred_h = T.exp(pred_w), T.exp(pred_h)		
		pred_conf = output[:,conf_idx].dimshuffle(0,'x',1,2,3)
		pred_class = output[:,-C:].dimshuffle(0,'x',1,2,3)
		
		#pred_w, pred_h = T.maximum(pred_w, 0.), T.maximum(pred_h, 0.)

		x_idx, y_idx = T.arange(0,truth.shape[1],4+C), T.arange(1,truth.shape[1],4+C)
		w_idx, h_idx = T.arange(2,truth.shape[1],4+C), T.arange(3,truth.shape[1],4+C)
		class_idx,_ = theano.scan(
			lambda x: T.arange(x,x+C,1),
			sequences = T.arange(4,truth.shape[1],4+C)
		)

		truth_x, truth_y = truth[:,x_idx], truth[:,y_idx]
		truth_w, truth_h = truth[:,w_idx], truth[:,h_idx]
		truth_class = truth[:, class_idx]
		
		# Get intersection region bounding box coordinates
		xi = T.maximum(pred_x, truth_x.dimshuffle(0,1,'x','x','x'))
		xf = T.minimum(pred_x + pred_w, (truth_x + truth_w).dimshuffle(0,1,'x','x','x'))
		yi = T.maximum(pred_y, truth_y.dimshuffle(0,1,'x','x','x'))
		yf = T.minimum(pred_y + pred_h, (truth_y + truth_h).dimshuffle(0,1,'x','x','x'))
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)

		# Calculate iou score for predicted boxes and truth
		isec = w * h
		union = (pred_w * pred_h) + (truth_w * truth_h).dimshuffle(0,1,'x','x','x') - isec
		iou = T.maximum(isec/union, 0.)

		# Calculate rmse for boxes which have 0 iou score
		squared_error = (pred_x - truth_x.dimshuffle(0,1,'x','x','x'))**2 + (pred_y - truth_y.dimshuffle(0,1,'x','x','x'))**2 + \
			(pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2 + (pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2

		# Get index matrix representing max along the 1st dimension for the iou score (reps 'responsible' box).
		maxval_idx, _ = meshgrid2D(T.arange(B), T.arange(truth.shape[0]))
		maxval_idx = maxval_idx.dimshuffle(0,'x',1,'x','x')
		maxval_idx = T.repeat(T.repeat(maxval_idx,S[0],3),S[1],4)

		# determine which box is responsible by giving box with highest iou score (if iou > 0) or smalles squared error.
		greater_iou = T.eq(maxval_idx, iou.argmax(axis=2).dimshuffle(0,1,'x',2,3))
		smaller_se = T.eq(maxval_idx, squared_error.argmin(axis=2).dimshuffle(0,1,'x',2,3))
		box_is_resp = T.switch(iou.max(axis=2, keepdims=True) > 0, greater_iou, smaller_se)
		
		# Get matrix for the width/height of each cell
		width, height = T.ones(S) / S[1], T.ones(S) / S[0]
		width, height = width.dimshuffle('x','x',0,1), height.dimshuffle('x','x',0,1)
		offset_x, offset_y = offset_x.dimshuffle('x','x',0,1), offset_y.dimshuffle('x','x',0,1)

		# Get bounding box for intersection between CELL and ground truth box.
		xi = T.maximum(offset_x, truth_x.dimshuffle(0,1,'x','x'))
		xf = T.minimum(offset_x + width, (truth_x + truth_w).dimshuffle(0,1,'x','x'))
		yi = T.maximum(offset_y, truth_y.dimshuffle(0,1,'x','x'))
		yf = T.minimum(offset_y + height, (truth_y + truth_h).dimshuffle(0,1,'x','x'))
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)

		# Calculate iou score for the cell.
		isec = w * h
		if not use_overlap:
			union = (width * height) + (truth_w* truth_h).dimshuffle(0,1,'x','x') - isec
			iou_cell = T.maximum(isec/union, 0.).dimshuffle(0,1,'x',2,3) # * (np.prod(S)) # normalize the iou to make more sense
		else:
			iou_cell = T.maximum(isec / (width * height), 0.).dimshuffle(0,1,'x',2,3)
		
		maxval_idx, _ = meshgrid2D(T.arange(iou_cell.shape[1]), T.arange(iou_cell.shape[0]))
		maxval_idx = maxval_idx.dimshuffle(0,1,'x','x','x')
		maxval_idx = T.repeat(T.repeat(T.repeat(maxval_idx, B, 2), S[0], 3), S[1], 4)
		
		obj_for_cell = T.eq(maxval_idx, iou_cell.argmax(axis=1).dimshuffle(0,'x',1,2,3))
			
		# Get logical matrix representing minimum iou score for cell to be considered overlapping ground truth.
		cell_intersects = (iou_cell > min_overlap)
		
		obj_in_cell_and_resp = T.bitwise_and(T.bitwise_and(cell_intersects, box_is_resp), obj_for_cell)
		conf_is_zero = T.bitwise_and(
			bitwise_not(T.bitwise_and(cell_intersects, box_is_resp)),
			obj_for_cell
		)
		conf_is_zero = conf_is_zero.sum(axis=1, keepdims=True)
		
		# repeat "cell overlaps" logical matrix for the number of classes.
		pred_class = T.repeat(pred_class, truth.shape[1] // (4 + C), axis=1)

		# repeat the ground truth for class probabilities for each cell.
		truth_class_rep = T.repeat(T.repeat(truth_class.dimshuffle(0,1,2,'x','x'), S[0], axis=3), S[1], axis=4)
		cell_intersects = T.repeat(cell_intersects, C, axis=2)

		if not rescore:
			iou = T.ones_like(iou)
		cost = T.sum((pred_conf - iou)[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_noobj * T.sum((pred_conf[conf_is_zero.nonzero()])**2) + \
		 	lmbda_coord * T.sum((pred_x - truth_x.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \
		 	lmbda_coord * T.sum((pred_y - truth_y.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_coord * T.sum((safe_sqrt(pred_w) - safe_sqrt(truth_w.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_coord * T.sum((safe_sqrt(pred_h) - safe_sqrt(truth_h.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_obj * T.sum(((pred_class - truth_class_rep)[cell_intersects.nonzero()])**2)

		cost /= T.maximum(1., truth.shape[0])
		return cost, [iou]
Ejemplo n.º 49
0
    def __init__(self, model, n_in, n_out, state_bounds, action_bounds,
                 reward_bound, settings_):

        super(Distillation,
              self).__init__(model, n_in, n_out, state_bounds, action_bounds,
                             reward_bound, settings_)

        # create a small convolutional neural network

        ### Load expert policy files
        self._expert_policies = []
        file_name_ = ""
        for i in range(len(self.getSettings()['expert_policy_files'])):
            file_name = self.getSettings(
            )['expert_policy_files'][i] + '/' + self.getSettings(
            )['model_type'] + '/' + getAgentName() + '.pkl'
            if (file_name_ == file_name):
                ## To help save memory when experts are the same
                # model_ = self._expert_policies[len(self._expert_policies)-1]
                self._expert_policies.append(model_)
            else:
                print("Loading pre compiled network: ", file_name)
                f = open(file_name, 'rb')
                model_ = dill.load(f)
                # model.setSettings(settings)
                f.close()
                self._expert_policies.append(model_)
            file_name_ = file_name

        self._actor_buffer_states = []
        self._actor_buffer_result_states = []
        self._actor_buffer_actions = []
        self._actor_buffer_rewards = []
        self._actor_buffer_falls = []
        self._actor_buffer_diff = []

        self._NotFallen = T.bcol("Not_Fallen")
        ## because float64 <= float32 * int32, need to use int16 or int8
        self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1),
                                                  dtype=np.dtype('int8'))

        self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1),
                                                        dtype='int8'),
                                               broadcastable=(False, True))

        self._tmp_diff = T.col("Tmp_Diff")
        self._tmp_diff.tag.test_value = np.zeros(
            (self._batch_size, 1),
            dtype=np.dtype(self.getSettings()['float_type']))

        self._tmp_diff_shared = theano.shared(np.zeros(
            (self._batch_size, 1), dtype=self.getSettings()['float_type']),
                                              broadcastable=(False, True))
        """
        self._target_shared = theano.shared(
            np.zeros((self._batch_size, 1), dtype='float64'),
            broadcastable=(False, True))
        """
        self._critic_regularization_weight = self.getSettings(
        )["critic_regularization_weight"]
        self._critic_learning_rate = self.getSettings()["critic_learning_rate"]
        ## Target network
        self._modelTarget = copy.deepcopy(model)

        self._q_valsA = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsA_drop = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)
        self._q_valsNextState = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTargetNextState = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTarget = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsTarget_drop = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)

        self._q_valsActA = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActTarget = lasagne.layers.get_output(
            self._modelTarget.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActA_drop = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)

        self._q_func = self._q_valsA
        self._q_funcTarget = self._q_valsTarget
        self._q_func_drop = self._q_valsA_drop
        self._q_funcTarget_drop = self._q_valsTarget_drop
        self._q_funcAct = self._q_valsActA
        self._q_funcAct_drop = self._q_valsActA_drop

        # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen
        # self._target = self._model.getRewardSymbolicVariable() + ((self._discount_factor * self._q_valsTargetNextState ) * self._NotFallen) + (self._NotFallen - 1)
        self._target = self._model.getRewardSymbolicVariable() + (
            self._discount_factor * self._q_valsTargetNextState)
        self._diff = self._target - self._q_func
        self._diff_drop = self._target - self._q_func_drop
        # loss = 0.5 * self._diff ** 2
        loss = T.pow(self._diff, 2)
        self._loss = T.mean(loss)
        self._loss_drop = T.mean(0.5 * self._diff_drop**2)

        self._params = lasagne.layers.helper.get_all_params(
            self._model.getCriticNetwork())
        self._actionParams = lasagne.layers.helper.get_all_params(
            self._model.getActorNetwork())
        self._givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards(),
            # self._NotFallen: self._NotFallen_shared
            # self._model.getActionSymbolicVariable(): self._actions_shared,
        }
        self._actGivens = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
            self._model.getActionSymbolicVariable():
            self._model.getActions(),
            # self._NotFallen: self._NotFallen_shared
            self._tmp_diff:
            self._tmp_diff_shared
        }

        self._critic_regularization = (
            self._critic_regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getCriticNetwork(), lasagne.regularization.l2))
        self._actor_regularization = (
            (self._regularization_weight *
             lasagne.regularization.regularize_network_params(
                 self._model.getActorNetwork(), lasagne.regularization.l2)))
        if (self.getSettings()['use_previous_value_regularization']):
            self._actor_regularization = self._actor_regularization + (
                (self.getSettings()['previous_value_regularization_weight']) *
                change_penalty(self._model.getActorNetwork(),
                               self._modelTarget.getActorNetwork()))
        elif ('regularization_type' in self.getSettings() and
              (self.getSettings()['regularization_type'] == 'KL_Divergence')):
            self._kl_firstfixed = T.mean(
                kl(
                    self._q_valsActTarget,
                    T.ones_like(self._q_valsActTarget) *
                    self.getSettings()['exploration_rate'], self._q_valsActA,
                    T.ones_like(self._q_valsActA) *
                    self.getSettings()['exploration_rate'],
                    self._action_length))
            #self._actor_regularization = (( self._KL_Weight ) * self._kl_firstfixed ) + (10*(self._kl_firstfixed>self.getSettings()['kl_divergence_threshold'])*
            #                                                                         T.square(self._kl_firstfixed-self.getSettings()['kl_divergence_threshold']))
            self._actor_regularization = (self._kl_firstfixed) * (
                self.getSettings()['kl_divergence_threshold'])

            print("Using regularization type : ",
                  self.getSettings()['regularization_type'])
        # SGD update
        # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho,
        #                                    self._rms_epsilon)
        self._value_grad = T.grad(self._loss + self._critic_regularization,
                                  self._params)
        ## Clipping the max gradient
        """
        for x in range(len(self._value_grad)): 
            self._value_grad[x] = T.clip(self._value_grad[x] ,  -0.1, 0.1)
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.rmsprop(self._value_grad,
                                                     self._params,
                                                     self._learning_rate,
                                                     self._rho,
                                                     self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.momentum(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adam(self._value_grad,
                                                  self._params,
                                                  self._critic_learning_rate,
                                                  beta1=0.9,
                                                  beta2=0.9,
                                                  epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adagrad(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])
            sys.exit(-1)
        ## TD update
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + self._critic_regularization, self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._updates_ = lasagne.updates.momentum(T.mean(self._q_func) + self._critic_regularization, self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), momentum=self._rho)
        elif ( self.getSettings()['optimizer'] == 'adam'):
            self._updates_ = lasagne.updates.adam(T.mean(self._q_func), self._params, 
                        self._critic_learning_rate * -T.mean(self._diff), beta1=0.9, beta2=0.999, epsilon=1e-08)
        else:
            print ("Unknown optimization method: ", self.getSettings()['optimizer'])
            sys.exit(-1)
        """
        ## Need to perform an element wise operation or replicate _diff for this to work properly.
        # self._actDiff = theano.tensor.elemwise.Elemwise(theano.scalar.mul)((self._model.getActionSymbolicVariable() - self._q_valsActA), theano.tensor.tile((self._diff * (1.0/(1.0-self._discount_factor))), self._action_length)) # Target network does not work well here?
        self._actDiff = (self._model.getActionSymbolicVariable() -
                         self._q_valsActA_drop)
        # self._actDiff = ((self._model.getActionSymbolicVariable() - self._q_valsActA)) # Target network does not work well here?
        # self._actDiff_drop = ((self._model.getActionSymbolicVariable() - self._q_valsActA_drop)) # Target network does not work well here?
        ## This should be a single column vector
        # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.transpose(T.sum(T.pow(self._actDiff, 2),axis=1) )), (self._diff * (1.0/(1.0-self._discount_factor))))
        # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.reshape(T.sum(T.pow(self._actDiff, 2),axis=1), (self._batch_size, 1) )),
        #                                                                        (self._tmp_diff * (1.0/(1.0-self._discount_factor)))
        # self._actLoss_ = (T.mean(T.pow(self._actDiff, 2),axis=1))

        self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(
            (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff))
        # self._actLoss = T.sum(self._actLoss)/float(self._batch_size)
        self._actLoss = T.mean(self._actLoss_)
        # self._actLoss_drop = (T.sum(0.5 * self._actDiff_drop ** 2)/float(self._batch_size)) # because the number of rows can shrink
        # self._actLoss_drop = (T.mean(0.5 * self._actDiff_drop ** 2))
        self._policy_grad = T.grad(self._actLoss + self._actor_regularization,
                                   self._actionParams)
        ## Clipping the max gradient
        """
        for x in range(len(self._policy_grad)): 
            self._policy_grad[x] = T.clip(self._policy_grad[x] ,  -0.5, 0.5)
        """
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._actionUpdates = lasagne.updates.rmsprop(
                self._policy_grad, self._actionParams, self._learning_rate,
                self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._actionUpdates = lasagne.updates.momentum(self._policy_grad,
                                                           self._actionParams,
                                                           self._learning_rate,
                                                           momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            self._actionUpdates = lasagne.updates.adam(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                beta1=0.9,
                beta2=0.999,
                epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            self._actionUpdates = lasagne.updates.adagrad(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])

        # actionUpdates = lasagne.updates.rmsprop(T.mean(self._q_funcAct_drop) +
        #   (self._regularization_weight * lasagne.regularization.regularize_network_params(
        #       self._model.getActorNetwork(), lasagne.regularization.l2)), actionParams,
        #           self._learning_rate * 0.5 * (-T.sum(actDiff_drop)/float(self._batch_size)), self._rho, self._rms_epsilon)
        self._givens_grad = {
            self._model.getStateSymbolicVariable(): self._model.getStates(),
            # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(),
            # self._model.getRewardSymbolicVariable(): self._model.getRewards(),
            # self._model.getActionSymbolicVariable(): self._model.getActions(),
        }

        ### Noisey state updates
        # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen
        # self._target_dyna = theano.gradient.disconnected_grad(self._q_func)

        ## Bellman error
        self._bellman = self._target - self._q_funcTarget

        # self._target = self._model.getRewardSymbolicVariable() +  (self._discount_factor * self._q_valsTargetNextState )
        ### Give v(s') the next state and v(s) (target) the current state
        self._diff_adv = (self._discount_factor *
                          self._q_func) - (self._q_valsTargetNextState)
        self._diff_adv_givens = {
            self._model.getStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getStates(),
        }

        Distillation.compile(self)
Ejemplo n.º 50
0
 def get_output_mask(self, train=False):
     X = self.get_input(train)
     return T.any(T.ones_like(X) * (1. - T.eq(X, self.mask_value)), axis=-1)
Ejemplo n.º 51
0
    def __init__(self, model, n_in, n_out, state_bounds, action_bounds,
                 reward_bound, settings_):

        super(Distillation,
              self).__init__(model, n_in, n_out, state_bounds, action_bounds,
                             reward_bound, settings_)

        # create a small convolutional neural network

        ### Load expert policy files
        self._expert_policies = []
        file_name_ = ""
        for i in range(len(self.getSettings()['expert_policy_files'])):
            file_name = self.getSettings(
            )['expert_policy_files'][i] + '/' + self.getSettings(
            )['model_type'] + '/' + getAgentName() + '.pkl'
            if (file_name_ == file_name):
                ## To help save memory when experts are the same
                self._expert_policies.append(model_)
            else:
                print("Loading pre compiled network: ", file_name)
                f = open(file_name, 'rb')
                model_ = dill.load(f)
                f.close()
                self._expert_policies.append(
                    model_)  # expert model, load the 2 expert models
            file_name_ = file_name

        self._actor_buffer_states = []
        self._actor_buffer_result_states = []
        self._actor_buffer_actions = []
        self._actor_buffer_rewards = []
        self._actor_buffer_falls = []
        self._actor_buffer_diff = []

        self._NotFallen = T.bcol("Not_Fallen")
        ## because float64 <= float32 * int32, need to use int16 or int8
        self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1),
                                                  dtype=np.dtype('int8'))

        self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1),
                                                        dtype='int8'),
                                               broadcastable=(False, True))

        self._tmp_diff = T.col("Tmp_Diff")
        self._tmp_diff.tag.test_value = np.zeros(
            (self._batch_size, 1),
            dtype=np.dtype(self.getSettings()['float_type']))

        self._tmp_diff_shared = theano.shared(
            np.zeros((self._batch_size, 1),
                     dtype=self.getSettings()['float_type']),
            broadcastable=(False, True))  #定义一个共享变量,初始值为为0

        self._critic_regularization_weight = self.getSettings(
        )["critic_regularization_weight"]
        self._critic_learning_rate = self.getSettings()["critic_learning_rate"]
        ## Target network
        self._modelTarget = copy.deepcopy(model)  # target model 是要更新的模型

        self._q_valsA = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #确定性原始模型的state值输出
        self._q_valsA_drop = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #非确定的state值输出
        self._q_valsNextState = lasagne.layers.get_output(
            self._model.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)  #下一步的state值
        self._q_valsTargetNextState = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getResultStateSymbolicVariable(),
            deterministic=True)  #目标模型的下一步的state值
        self._q_valsTarget = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #目标模型的state值
        self._q_valsTarget_drop = lasagne.layers.get_output(
            self._modelTarget.getCriticNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #目标模型的state

        self._q_valsActA = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)
        self._q_valsActTarget = lasagne.layers.get_output(
            self._modelTarget.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=True)  #remove the random
        self._q_valsActA_drop = lasagne.layers.get_output(
            self._model.getActorNetwork(),
            self._model.getStateSymbolicVariable(),
            deterministic=False)  #actor 值

        self._q_func = self._q_valsA
        self._q_funcTarget = self._q_valsTarget
        self._q_func_drop = self._q_valsA_drop
        self._q_funcTarget_drop = self._q_valsTarget_drop
        self._q_funcAct = self._q_valsActA
        self._q_funcAct_drop = self._q_valsActA_drop

        self._target = self._model.getRewardSymbolicVariable() + (
            self._discount_factor * self._q_valsTargetNextState)
        # self._model.getRewardSymbolicVariable() 获取rewards的值getRewards() =self._rewards_shared 从0开始一直更新
        self._diff = self._target - self._q_func
        self._diff_drop = self._target - self._q_func_drop  #更新的模型的reward减去原始模型的critic的输出值
        loss = T.pow(self._diff, 2)
        self._loss = T.mean(loss)  # 两个模型的reward的差值
        self._loss_drop = T.mean(0.5 * self._diff_drop**2)

        self._params = lasagne.layers.helper.get_all_params(
            self._model.getCriticNetwork())
        self._actionParams = lasagne.layers.helper.get_all_params(
            self._model.getActorNetwork())
        self._givens_ = {
            self._model.getStateSymbolicVariable():
            self._model.getStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getRewardSymbolicVariable():
            self._model.getRewards()
        }
        self._actGivens = {
            self._model.getStateSymbolicVariable(): self._model.getStates(),
            self._model.getActionSymbolicVariable(): self._model.getActions(),
            self._tmp_diff: self._tmp_diff_shared
        }

        self._critic_regularization = (
            self._critic_regularization_weight *
            lasagne.regularization.regularize_network_params(
                self._model.getCriticNetwork(), lasagne.regularization.l2))
        self._actor_regularization = (
            (self._regularization_weight *
             lasagne.regularization.regularize_network_params(
                 self._model.getActorNetwork(), lasagne.regularization.l2)))
        if (self.getSettings()['use_previous_value_regularization']):
            self._actor_regularization = self._actor_regularization + (
                (self.getSettings()['previous_value_regularization_weight']) *
                change_penalty(self._model.getActorNetwork(),
                               self._modelTarget.getActorNetwork()))
        elif ('regularization_type' in self.getSettings() and
              (self.getSettings()['regularization_type'] == 'KL_Divergence')):
            self._kl_firstfixed = T.mean(
                kl(
                    self._q_valsActTarget,
                    T.ones_like(self._q_valsActTarget) *
                    self.getSettings()['exploration_rate'], self._q_valsActA,
                    T.ones_like(self._q_valsActA) *
                    self.getSettings()['exploration_rate'],
                    self._action_length))
            self._actor_regularization = (self._kl_firstfixed) * (
                self.getSettings()['kl_divergence_threshold'])

            print("Using regularization type : ",
                  self.getSettings()['regularization_type'])
        # SGD update
        self._value_grad = T.grad(self._loss + self._critic_regularization,
                                  self._params)
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.rmsprop(self._value_grad,
                                                     self._params,
                                                     self._learning_rate,
                                                     self._rho,
                                                     self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.momentum(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adam(self._value_grad,
                                                  self._params,
                                                  self._critic_learning_rate,
                                                  beta1=0.9,
                                                  beta2=0.9,
                                                  epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            print("Optimizing Value Function with ",
                  self.getSettings()['optimizer'], " method")
            self._updates_ = lasagne.updates.adagrad(
                self._value_grad,
                self._params,
                self._critic_learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])
            sys.exit(-1)
        ## TD update

        ## Need to perform an element wise operation or replicate _diff for this to work properly.
        self._actDiff = (self._model.getActionSymbolicVariable() -
                         self._q_valsActA_drop)  # 更新模型的actor的输出减去原始模型的actor值

        self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(
            (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff))
        self._actLoss = T.mean(self._actLoss_)
        self._policy_grad = T.grad(self._actLoss + self._actor_regularization,
                                   self._actionParams)
        ## Clipping the max gradient
        if (self.getSettings()['optimizer'] == 'rmsprop'):
            self._actionUpdates = lasagne.updates.rmsprop(
                self._policy_grad, self._actionParams, self._learning_rate,
                self._rho, self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'momentum'):
            self._actionUpdates = lasagne.updates.momentum(self._policy_grad,
                                                           self._actionParams,
                                                           self._learning_rate,
                                                           momentum=self._rho)
        elif (self.getSettings()['optimizer'] == 'adam'):
            self._actionUpdates = lasagne.updates.adam(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                beta1=0.9,
                beta2=0.999,
                epsilon=self._rms_epsilon)
        elif (self.getSettings()['optimizer'] == 'adagrad'):
            self._actionUpdates = lasagne.updates.adagrad(
                self._policy_grad,
                self._actionParams,
                self._learning_rate,
                epsilon=self._rms_epsilon)
        else:
            print("Unknown optimization method: ",
                  self.getSettings()['optimizer'])

        self._givens_grad = {
            self._model.getStateSymbolicVariable(): self._model.getStates()
        }

        ## Bellman error
        self._bellman = self._target - self._q_funcTarget

        ### Give v(s') the next state and v(s) (target) the current state
        self._diff_adv = (self._discount_factor * self._q_func) - (
            self._q_valsTargetNextState
        )  #\gamma*critic模型的输出-critic模型在下一个状态的输出值
        self._diff_adv_givens = {
            self._model.getStateSymbolicVariable():
            self._model.getResultStates(),
            self._model.getResultStateSymbolicVariable():
            self._model.getStates(),
        }

        Distillation.compile(self)
def train_conv_net(datasets,
                   U,
                   ofile,
                   cv=0,
                   attr=0,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0][0])
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print(parameters)

    # define model architecture
    index = T.lscalar()
    x = T.tensor3('x', dtype=theano.config.floatX)
    y = T.ivector('y')
    mair = T.dmatrix('mair')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector(dtype=theano.config.floatX)
    zero_vec = np.zeros(img_w, dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)

    conv_layers = []

    for i in range(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        image_shape=None,
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        conv_layers.append(conv_layer)

    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], x.shape[1], x.shape[2], Words.shape[1]))

    def convolve_user_statuses(statuses):
        layer1_inputs = []

        def sum_mat(mat, out):
            z = ifelse(
                T.neq(T.sum(mat, dtype=theano.config.floatX),
                      T.constant(0, dtype=theano.config.floatX)),
                T.constant(1, dtype=theano.config.floatX),
                T.constant(0, dtype=theano.config.floatX))
            return out + z, theano.scan_module.until(
                T.eq(z, T.constant(0, dtype=theano.config.floatX)))

        status_count, _ = theano.scan(fn=sum_mat,
                                      sequences=statuses,
                                      outputs_info=T.constant(
                                          0, dtype=theano.config.floatX))

        # Slice-out dummy (zeroed) sentences
        relv_input = statuses[:T.cast(status_count[-1], dtype='int32'
                                      )].dimshuffle(0, 'x', 1, 2)

        for conv_layer in conv_layers:
            layer1_inputs.append(
                conv_layer.set_input(input=relv_input).flatten(2))

        features = T.concatenate(layer1_inputs, axis=1)

        avg_feat = T.max(features, axis=0)

        return avg_feat

    conv_feats, _ = theano.scan(fn=convolve_user_statuses,
                                sequences=layer0_input)

    # Add Mairesse features
    layer1_input = T.concatenate([conv_feats, mair], axis=1)  ##mairesse_change
    hidden_units[0] = feature_maps * len(filter_hs) + datasets[4].shape[
        1]  ##mairesse_change
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    svm_data = T.concatenate(
        [classifier.layers[0].output,
         y.dimshuffle(0, 'x')], axis=1)
    # define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        # if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    # shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    # extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        rand_perm = np.random.permutation(range(len(datasets[0])))
        train_set_x = datasets[0][rand_perm]
        train_set_y = datasets[1][rand_perm]
        train_set_m = datasets[4][rand_perm]
        extra_data_x = train_set_x[:extra_data_num]
        extra_data_y = train_set_y[:extra_data_num]
        extra_data_m = train_set_m[:extra_data_num]
        new_data_x = np.append(datasets[0], extra_data_x, axis=0)
        new_data_y = np.append(datasets[1], extra_data_y, axis=0)
        new_data_m = np.append(datasets[4], extra_data_m, axis=0)
    else:
        new_data_x = datasets[0]
        new_data_y = datasets[1]
        new_data_m = datasets[4]
    rand_perm = np.random.permutation(range(len(new_data_x)))
    new_data_x = new_data_x[rand_perm]
    new_data_y = new_data_y[rand_perm]
    new_data_m = new_data_m[rand_perm]
    n_batches = new_data_x.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))
    # divide train set into train/val sets
    test_set_x = datasets[2]
    test_set_y = np.asarray(datasets[3], "int32")
    test_set_m = datasets[5]
    train_set_x, train_set_y, train_set_m = shared_dataset(
        (new_data_x[:n_train_batches * batch_size],
         new_data_y[:n_train_batches * batch_size],
         new_data_m[:n_train_batches * batch_size]))
    val_set_x, val_set_y, val_set_m = shared_dataset(
        (new_data_x[n_train_batches * batch_size:],
         new_data_y[n_train_batches * batch_size:],
         new_data_m[n_train_batches * batch_size:]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size],
            mair: val_set_m[index * batch_size:(index + 1) * batch_size]
        },  ##mairesse_change
        allow_input_downcast=False)

    # compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        [classifier.errors(y), svm_data],
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mair: train_set_m[index * batch_size:(index + 1) * batch_size]
        },
        ##mairesse_change
        allow_input_downcast=True)
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mair: train_set_m[index * batch_size:(index + 1) * batch_size]
        },
        ##mairesse_change
        allow_input_downcast=True)

    test_y_pred = classifier.predict(layer1_input)
    test_error = T.sum(T.neq(test_y_pred, y), dtype=theano.config.floatX)
    true_p = T.sum(test_y_pred * y, dtype=theano.config.floatX)
    false_p = T.sum(test_y_pred *
                    T.mod(y + T.ones_like(y, dtype=theano.config.floatX),
                          T.constant(2, dtype='int32')))
    false_n = T.sum(
        y * T.mod(test_y_pred + T.ones_like(y, dtype=theano.config.floatX),
                  T.constant(2, dtype='int32')))
    test_model_all = theano.function(
        [
            x,
            y,
            mair  ##mairesse_change
        ],
        [test_error, true_p, false_p, false_n, svm_data],
        allow_input_downcast=True)

    test_batches = test_set_x.shape[0] / batch_size

    # start training over mini-batches
    print('... training')
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    fscore = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in range(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in range(n_train_batches)]
        train_perf = 1 - np.mean([loss[0] for loss in train_losses])
        val_losses = [val_model(i) for i in range(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        epoch_perf = 'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%' % (
            epoch, time.time() - start_time, train_perf * 100.,
            val_perf * 100.)
        print(epoch_perf)
        ofile.write(epoch_perf + "\n")
        ofile.flush()
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss_list = [
                test_model_all(
                    test_set_x[idx * batch_size:(idx + 1) * batch_size],
                    test_set_y[idx * batch_size:(idx + 1) * batch_size],
                    test_set_m[idx * batch_size:(idx + 1) *
                               batch_size]  ##mairesse_change
                ) for idx in range(test_batches)
            ]
            if test_set_x.shape[0] > test_batches * batch_size:
                test_loss_list.append(
                    test_model_all(
                        test_set_x[test_batches * batch_size:],
                        test_set_y[test_batches * batch_size:],
                        test_set_m[test_batches *
                                   batch_size:]  ##mairesse_change
                    ))
            test_loss_list_temp = test_loss_list
            test_loss_list = np.asarray([t[:-1] for t in test_loss_list])
            test_loss = np.sum(test_loss_list[:, 0]) / float(
                test_set_x.shape[0])
            test_perf = 1 - test_loss
            tp = np.sum(test_loss_list[:, 1])
            fp = np.sum(test_loss_list[:, 2])
            fn = np.sum(test_loss_list[:, 3])
            tn = test_set_x.shape[0] - (tp + fp + fn)
            fscore = np.mean([
                2 * tp / float(2 * tp + fp + fn),
                2 * tn / float(2 * tn + fp + fn)
            ])
            svm_test = np.concatenate([t[-1] for t in test_loss_list_temp],
                                      axis=0)
            svm_train = np.concatenate([t[1] for t in train_losses], axis=0)
            output = "Test result: accu: " + str(
                test_perf) + ", macro_fscore: " + str(fscore) + "\ntp: " + str(
                    tp) + " tn:" + str(tn) + " fp: " + str(fp) + " fn: " + str(
                        fn)
            print(output)
            ofile.write(output + "\n")
            ofile.flush()
            # dump train and test features
            cPickle.dump(svm_test,
                         open("cvte" + str(attr) + str(cv) + ".p", "wb"))
            cPickle.dump(svm_train,
                         open("cvtr" + str(attr) + str(cv) + ".p", "wb"))
        updated_epochs = refresh_epochs()
        if updated_epochs != None and n_epochs != updated_epochs:
            n_epochs = updated_epochs
            print('Epochs updated to ' + str(n_epochs))
    return test_perf, fscore
Ejemplo n.º 53
0
    def build_model(self):
        views_curr = T.tensor4('views')
        action_hists_curr = T.tensor4('action_hists')
        actions = T.icol('actions')
        views_next = T.tensor4('next_views')
        action_hists_next = T.tensor4('next_action_hists')
        rewards = T.col('rewards')
        terminals = T.icol('terminals')

        # initialize network(s) for computing q-values
        net_online_in_view, net_online_in_action_hist, self.net_online_out, self.all_layers = \
         self.build_network(self.network_builder, self.view_size, self.action_hist_size)
        net_online_in_curr = {net_online_in_view: views_curr, net_online_in_action_hist: action_hists_curr} \
         if self.action_hist_size.w > 0 else {net_online_in_view: views_curr}
        q_vals_online_curr_train = lasagne.layers.get_output(
            self.net_online_out, net_online_in_curr, deterministic=False)
        q_vals_online_curr_test = lasagne.layers.get_output(
            self.net_online_out, net_online_in_curr, deterministic=True)
        # for predictions we always use the q-values estimated by the online network on the current state
        q_vals_pred_train = q_vals_online_curr_train
        q_vals_pred_test = q_vals_online_curr_test
        if self.clone_interval > 0:
            net_target_in_view, net_target_in_action_hist, self.net_target_out, _ = \
             self.build_network(self.network_builder, self.view_size, self.action_hist_size)
            self._clone()
            net_target_in_next = {net_target_in_view: views_next, net_target_in_action_hist: action_hists_next} \
             if self.action_hist_size.w > 0 else {net_target_in_view: views_next}
            # predict q-values for next state with target network
            q_vals_target_next = lasagne.layers.get_output(
                self.net_target_out, net_target_in_next)
            if self.double_q:
                # Double Q-Learning:
                # use online network to choose best action on next state (q_vals_target_argmax)...
                net_online_in_next = {net_online_in_view: views_next, net_online_in_action_hist: action_hists_next} \
                 if self.action_hist_size.w > 0 else {net_online_in_view: views_next}
                q_vals_online_next = lasagne.layers.get_output(
                    self.net_online_out, net_online_in_next)
                q_vals_target_argmax = T.argmax(q_vals_online_next,
                                                axis=1,
                                                keepdims=False)
                # ...but use target network to estimate q-values for these actions
                q_vals_target = T.diagonal(
                    T.take(q_vals_target_next, q_vals_target_argmax,
                           axis=1)).reshape((-1, 1))
            else:
                q_vals_target = T.max(q_vals_target_next,
                                      axis=1,
                                      keepdims=True)
        else:
            net_target_in_next = {net_online_in_view: views_next, net_online_in_action_hist: action_hists_next} \
             if self.action_hist_size.w > 0 else {net_online_in_view: views_next}
            q_vals_online_next = lasagne.layers.get_output(
                self.net_online_out, net_target_in_next)
            q_vals_target = T.max(q_vals_online_next, axis=1, keepdims=True)
        # define loss computation
        actionmask = T.eq(
            T.arange(len(self.actions)).reshape((1, -1)),
            actions.reshape((-1, 1))).astype(theano.config.floatX)
        terminals_float = terminals.astype(theano.config.floatX)
        target = rewards + \
           (T.ones_like(terminals_float) - terminals_float) * \
           self.discount * q_vals_target
        output = (q_vals_pred_train * actionmask).sum(axis=1).reshape((-1, 1))
        diff = target - output
        if self.clip_delta > 0:
            # see https://github.com/spragunr/deep_q_rl/blob/master/deep_q_rl/q_network.py
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = quadratic_part**2 + self.clip_delta * linear_part
        else:
            loss = diff**2

        # regularization
        if self.all_layers is not None and self.regularization > 0:
            l2reg = 0
            for lll in self.all_layers:
                l2reg += regularize_layer_params(lll, l2) * self.regularization
            loss = T.mean(loss) + l2reg  # batch accumulator sum or mean
        else:
            loss = T.mean(loss)

        # define network update for training
        params = lasagne.layers.helper.get_all_params(self.net_online_out,
                                                      trainable=True)
        updates = self.optimizer(loss, params)
        train_givens = self.shared_batch.givens(views_curr, action_hists_curr,
                                                actions, views_next,
                                                action_hists_next, rewards,
                                                terminals)
        self.train_fn = theano.function([], [loss],
                                        updates=updates,
                                        givens=train_givens)

        # define output prediction
        predict_givens = self.shared_state.givens(views_curr,
                                                  action_hists_curr)
        self.predict_fn = theano.function([],
                                          q_vals_pred_test[0],
                                          givens=predict_givens)
Ejemplo n.º 54
0
 def get_output_mask(self, train=None):
     X = self.get_input(train)
     if not self.mask_zero:
         return None
     else:
         return T.ones_like(X) * (1 - T.eq(X, 0))
Ejemplo n.º 55
0
terminal_var = T.col(dtype='int8')

network = lasagne.layers.InputLayer((mbsize, channel, height, width),
                                    input_var)
network = lasagne.layers.Conv2DLayer(network,
                                     num_filters=3,
                                     filter_size=(2, 2),
                                     stride=(1, 1))
network = lasagne.layers.DenseLayer(network, num_units=10)
network = lasagne.layers.DenseLayer(network,
                                    num_units=num_action,
                                    nonlinearity=None)
network_out = lasagne.layers.get_output(network)
network_indexing = network_out[T.arange(mbsize), action_var.reshape((-1, ))]
network_max = T.max(network_out, axis=1).reshape((-1, 1))
network_discount = gamma * network_max * (T.ones_like(terminal_var) -
                                          terminal_var)

f = theano.function([input_var], network_out)
g = theano.function([input_var, action_var], network_indexing)
h = theano.function([input_var], network_max)
j = theano.function([input_var, terminal_var], network_discount)

inp = np.uint8(np.random.randint(0, 256, (mbsize, channel, height, width)))
print "inp.shape", inp.shape

act = np.uint8(np.random.randint(0, num_action, (mbsize, 1)))
print act.reshape(-1, )

out = f(inp)
print "out", out
Ejemplo n.º 56
0
import theano
import theano.tensor as T

k = T.iscalar("k")
A = T.vector("A")

# Symbolic description of the result
result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
                              outputs_info=T.ones_like(A),
                              non_sequences=A,
                              n_steps=k)

# Optimization saving memory.
final_result = result[-1]

# Compiled function that returns A**k
power = theano.function(inputs=[A, k], outputs=final_result, updates=updates)

print power(range(10), 2)
print power(range(10), 4)

    def __init__(self, input_width, input_height, num_actions,
                 num_frames, discount, learning_rate, rho,
                 rms_epsilon, momentum, clip_delta, freeze_interval,
                 batch_size, network_type, update_rule, lambda_reg,
                 batch_accumulator, pretrained_net, rng, input_scale=255.0):

        self.input_width = input_width
        self.input_height = input_height
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self.rng = rng
        self.lambda_reg = lambda_reg

        lasagne.random.set_rng(self.rng)

        self.update_counter = 0

        self.l_in, self.l_act_in, self.l_out, self.pred_z, self.true_z = \
                                        self.build_network(network_type, \
                                        input_width, input_height, num_actions,\
                                        num_frames, batch_size)

        if self.freeze_interval > 0:
            self.next_l_in, self.next_l_act_in, self.next_l_out, _d, _d = \
                                self.build_network(network_type, input_width, \
                                input_height, num_actions, num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.imatrix('actions')
        terminals = T.icol('terminals')

        # Shared variables for training from a minibatch of replayed
        # state transitions, each consisting of num_frames + 1 (due to
        # overlap) images, along with the chosen action and resulting
        # reward and terminal status.
        self.imgs_shared = theano.shared(
            np.zeros((batch_size, num_frames*2+1, input_height, input_width),
                     dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))
        self.actions_shared = theano.shared(
            np.zeros((batch_size, num_frames), dtype='int32')
            )
        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        # Shared variable for a single state, to calculate q_vals.
        self.state_shared = theano.shared(
            np.zeros((num_frames*2, input_height, input_width),
                     dtype=theano.config.floatX))

        q_vals, z_pred, z_true = lasagne.layers.get_output(
                                    [self.l_out, self.pred_z, self.true_z],
                                    inputs = {self.l_in: states / input_scale,
                                        self.l_act_in: actions}
                                )
        
        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(
                                    self.next_l_out,
                                    {self.next_l_in: next_states / input_scale, 
                                     self.next_l_act_in: actions}
                                    )
        else:
            next_q_vals = lasagne.layers.get_output(
                                    self.l_out,
                                    {self.l_in: next_states / input_scale, 
                                     self.l_act_in: actions}
                                    )
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        terminalsX = terminals.astype(theano.config.floatX)
        actionmask = T.eq(T.arange(num_actions).reshape((1, -1)),
                actions[:, 0].reshape((-1, 1))).astype(theano.config.floatX)

        target = (rewards +
                  (T.ones_like(terminalsX) - terminalsX) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        output = (q_vals * actionmask).sum(axis=1).reshape((-1, 1))
        diff = target - output
        diff_reg = z_true - z_pred

        if self.clip_delta > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            # 
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
        else:
            loss = 0.5 * diff ** 2

        loss = loss + 0.5 * self.lambda_reg * (diff_reg ** 2).sum(axis=1)

        if batch_accumulator == 'sum':
            loss = T.sum(loss)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

        params = lasagne.layers.helper.get_all_params([self.l_out, self.pred_z, self.true_z])  
        train_givens = {
            states: self.imgs_shared[:, :-1],
            next_states: self.imgs_shared[:, 1:],
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }

        if update_rule == 'deepmind_rmsprop':
            updates = deepmind_rmsprop(loss, params, self.lr, self.rho,
                                       self.rms_epsilon)
        elif update_rule == 'rmsprop':
            updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho,
                                              self.rms_epsilon)
        elif update_rule == 'sgd':
            updates = lasagne.updates.sgd(loss, params, self.lr)
        else:
            raise ValueError("Unrecognized update: {}".format(update_rule))

        if self.momentum > 0:
            updates = lasagne.updates.apply_momentum(updates, None,
                                                     self.momentum)

        self._train = theano.function([], [loss], updates=updates,
                                      givens=train_givens)
        q_givens = {
            states: self.state_shared.reshape((1,
                                               self.num_frames*2,
                                               self.input_height,
                                               self.input_width))
        }
        self._q_vals = theano.function([], q_vals[0], givens=q_givens)
Ejemplo n.º 58
0
    def __init__(self,
                 input_width,
                 input_height,
                 num_actions,
                 num_frames,
                 discount,
                 learning_rate,
                 rho,
                 rms_epsilon,
                 momentum,
                 clip_delta,
                 freeze_interval,
                 batch_size,
                 network_type,
                 update_rule,
                 batch_accumulator,
                 rng,
                 action_selection,
                 input_scale=255.0):

        self.input_width = input_width
        self.input_height = input_height
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self.rng = rng
        self.max_compression_loss = 0

        if action_selection == 'epsilon-greedy':
            self.choose_action = self.choose_action_epsilon_greedy
        elif action_selection == 'softmax':
            self.choose_action = self.choose_action_softmax
        else:
            raise ValueError(
                "Unrecognized action selection: {}".format(action_selection))

        lasagne.random.set_rng(self.rng)

        self.update_counter = 0

        self.l_out, self.original, self.reconstructed = self.build_network(
            network_type, input_width, input_height, num_actions, num_frames,
            batch_size)
        if self.freeze_interval > 0:
            self.next_l_out, _, _ = self.build_network(network_type,
                                                       input_width,
                                                       input_height,
                                                       num_actions, num_frames,
                                                       batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        self.states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.rewards_shared = theano.shared(np.zeros(
            (batch_size, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))

        self.actions_shared = theano.shared(np.zeros((batch_size, 1),
                                                     dtype='int32'),
                                            broadcastable=(False, True))

        self.terminals_shared = theano.shared(np.zeros((batch_size, 1),
                                                       dtype='int32'),
                                              broadcastable=(False, True))

        q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)

        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                    next_states / input_scale)
        else:
            next_q_vals = lasagne.layers.get_output(self.l_out,
                                                    next_states / input_scale)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        original_vals = lasagne.layers.get_output(self.original,
                                                  states / input_scale)
        reconstructed_vals = lasagne.layers.get_output(self.reconstructed,
                                                       states / input_scale)
        compression_loss = T.mean(0.5 *
                                  (original_vals - reconstructed_vals)**2)

        target = (rewards + compression_loss +
                  (T.ones_like(terminals) - terminals) * self.discount *
                  T.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[T.arange(batch_size),
                               actions.reshape((-1, ))].reshape((-1, 1))

        if self.clip_delta > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            #
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = 0.5 * quadratic_part**2 + self.clip_delta * linear_part
        else:
            loss = 0.5 * diff**2

        if batch_accumulator == 'sum':
            loss = T.sum(loss)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

        params = lasagne.layers.helper.get_all_params(self.l_out)
        compression_params = lasagne.layers.helper.get_all_params(
            self.reconstructed)
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        if update_rule == 'deepmind_rmsprop':
            updates = deepmind_rmsprop(loss, params, self.lr, self.rho,
                                       self.rms_epsilon)
        elif update_rule == 'rmsprop':
            updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho,
                                              self.rms_epsilon)
        elif update_rule == 'sgd':
            updates = lasagne.updates.sgd(loss, params, self.lr)
        else:
            raise ValueError("Unrecognized update: {}".format(update_rule))

        if self.momentum > 0:
            updates = lasagne.updates.apply_momentum(updates, None,
                                                     self.momentum)

        compression_updates = lasagne.updates.rmsprop(compression_loss,
                                                      compression_params,
                                                      self.lr, self.rho,
                                                      self.rms_epsilon)
        updates += compression_updates

        self._train = theano.function([], [loss, compression_loss, q_vals],
                                      updates=updates,
                                      givens=givens)

        self._q_vals = theano.function([],
                                       q_vals,
                                       givens={states: self.states_shared})

        self._compression_loss = theano.function(
            [], compression_loss, givens={states: self.states_shared})
    def __init__(self, **option):

        # source and target embedding dim
        sedim, tedim = option["embdim"]
        # source, target and attention hidden dim
        shdim, thdim, ahdim = option["hidden"]
        # maxout hidden dim
        maxdim = option["maxhid"]
        # maxout part
        maxpart = option["maxpart"]
        # deepout hidden dim
        deephid = option["deephid"]
        svocab, tvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab
        # source and target vocabulary size
        svsize, tvsize = len(sid2w), len(tid2w)

        if "scope" not in option or option["scope"] is None:
            option["scope"] = "proteinsearch"

        if "initializer" not in option:
            option["initializer"] = None

        if "regularizer" not in option:
            option["regularizer"] = None

        if "keep_prob" not in option:
            option["keep_prob"] = 1.0

        dtype = theano.config.floatX
        initializer = option["initializer"]
        regularizer = option["regularizer"]
        keep_prob = option["keep_prob"] or 1.0

        scope = option["scope"]
        decoder_scope = "decoder2"

        encoder = Encoder(sedim, shdim)
        from . import decoder2
        decoder = decoder2.DecoderGruCond(2,
                                          option['method'],
                                          tedim,
                                          thdim,
                                          ahdim,
                                          2 * shdim + thdim,
                                          dim_readout=deephid,
                                          n_y_vocab=tvsize)

        # training graph
        with ops.variable_scope(scope,
                                initializer=initializer,
                                regularizer=regularizer,
                                dtype=dtype):
            src_seq = T.imatrix("source_sequence")
            src_mask = T.matrix("source_sequence_mask")
            tgt_seq = T.imatrix("target_sequence")
            tgt_mask = T.matrix("target_sequence_mask")
            byseq = T.imatrix("backward_target_sequence")

            with ops.variable_scope("source_embedding"):
                source_embedding = ops.get_variable("embedding",
                                                    [svsize, sedim])
                source_bias = ops.get_variable("bias", [sedim])

            with ops.variable_scope("target_embedding"):
                target_embedding = ops.get_variable("embedding",
                                                    [tvsize, tedim])
                target_bias = ops.get_variable("bias", [tedim])

            source_inputs = nn.embedding_lookup(source_embedding,
                                                src_seq) + source_bias
            target_inputs = nn.embedding_lookup(target_embedding,
                                                tgt_seq) + target_bias
            by_inputs = nn.embedding_lookup(target_embedding,
                                            byseq) + target_bias

            if keep_prob < 1.0:
                source_inputs = nn.dropout(source_inputs, keep_prob=keep_prob)
                target_inputs = nn.dropout(target_inputs, keep_prob=keep_prob)
                by_inputs = nn.dropout(by_inputs, keep_prob=keep_prob)

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            annotation = nn.dropout(annotation, keep_prob=keep_prob)

            from . import softdec
            soft_decoder = softdec.SoftDecoder(option["eosid"],
                                               option["softk"],
                                               tedim,
                                               thdim,
                                               ahdim,
                                               2 * shdim,
                                               dim_readout=deephid,
                                               n_y_vocab=tvsize)
            with ops.variable_scope('soft_decoder'):
                initial_state = nn.feedforward(states[-1], [shdim, thdim],
                                               True,
                                               scope='initial',
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)
                soft_states, _, _, soft_mask = soft_decoder.infer(
                    mapped_keys, src_mask, annotation, initial_state,
                    target_embedding, target_bias, keep_prob)

            with ops.variable_scope('soft_decoder', reuse=True):
                _, _, soft_cost, _ = soft_decoder.forward(
                    byseq, by_inputs, tgt_mask, mapped_keys, src_mask,
                    annotation, initial_state, keep_prob)

            # compute initial state for decoder
            # first state of backward encoder
            # initialize with only encoder state
            final_state = r_states[0]

            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                # keys for query
                with ops.variable_scope('map-key-src'):
                    mapped_keys_src = map_key(annotation, 2 * shdim, ahdim)
                with ops.variable_scope('map-key-soft'):
                    mapped_keys_soft = map_key(soft_states, thdim, ahdim)

                _, _, _, snt_cost = decoder.forward(
                    tgt_seq, target_inputs, tgt_mask,
                    [mapped_keys_src, mapped_keys_soft], [src_mask, soft_mask],
                    [annotation, soft_states], initial_state, keep_prob)

            ce = snt_cost
            true_cost = T.mean(ce)
            lamb = theano.shared(numpy.asarray(option['lambda'], dtype),
                                 'lambda')
            cost = lamb * soft_cost + (1 - lamb) * true_cost

        # import utils.ttensor
        # print('true_cost %d:' % len(utils.ttensor.find_inputs_and_params(true_cost)[0]))
        # for xxx in utils.ttensor.find_inputs_and_params(true_cost)[0]:
        #     print('\t', xxx)
        # print('soft_cost %d:' % len(utils.ttensor.find_inputs_and_params(soft_cost)[0]))
        # for xxx in utils.ttensor.find_inputs_and_params(soft_cost)[0]:
        #     print('\t', xxx)
        # print('tot_cost: %d' % len(utils.ttensor.find_inputs_and_params(cost)[0]))
        # for xxx in utils.ttensor.find_inputs_and_params(cost)[0]:
        #     print('\t', xxx)
        # print('snt_cost: %d' % len(utils.ttensor.find_inputs_and_params(snt_cost)[0]))
        # for xxx in utils.ttensor.find_inputs_and_params(snt_cost)[0]:
        #     print('\t', xxx)

        training_inputs = [src_seq, src_mask, tgt_seq, tgt_mask, byseq]
        training_outputs = [cost, soft_cost, true_cost]

        # get_snt_cost = theano.function(training_inputs[:4], snt_cost)
        get_snt_cost = None

        # decoding graph
        with ops.variable_scope(scope, reuse=True):
            prev_words = T.ivector("prev_words")

            # disable dropout
            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            source_inputs = source_inputs + source_bias
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)
            target_inputs = target_inputs + target_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope('soft_decoder'):
                initial_state = nn.feedforward(states[-1], [shdim, thdim],
                                               True,
                                               scope='initial',
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)
                soft_states, soft_contexts, soft_probs, soft_mask = soft_decoder.infer(
                    mapped_keys, src_mask, annotation, initial_state,
                    target_embedding, target_bias, 1.0)

            # decoder
            final_state = r_states[0]
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                # keys for query
                with ops.variable_scope('map-key-src'):
                    mapped_keys_src = map_key(annotation, 2 * shdim, ahdim)
                with ops.variable_scope('map-key-soft'):
                    mapped_keys_soft = map_key(soft_states, thdim, ahdim)

            prev_inputs = nn.embedding_lookup(target_embedding, prev_words)
            prev_inputs = prev_inputs + target_bias

            cond = T.neq(prev_words, 0)
            # zeros out embedding if y is 0, which indicates <s>
            prev_inputs = prev_inputs * cond[:, None]

            with ops.variable_scope(decoder_scope):
                mask = T.ones_like(prev_words, dtype=dtype)
                next_state, context = decoder.step(
                    prev_inputs, mask, initial_state, *[
                        mapped_keys_src, mapped_keys_soft, annotation,
                        soft_states, src_mask, soft_mask
                    ])
                probs = decoder.prediction(prev_inputs, next_state, context)

                # encoding
        encoding_inputs = [src_seq, src_mask]
        encoding_outputs = [
            initial_state, annotation, soft_states, mapped_keys_src,
            mapped_keys_soft, soft_mask
        ]
        encode = theano.function(encoding_inputs, encoding_outputs)

        if option["decoder"] == "GruSimple":
            raise ValueError()
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys, src_mask
            ]
            prediction_outputs = [probs, context]
            predict = theano.function(prediction_inputs, prediction_outputs)

            generation_inputs = [prev_words, initial_state, context]
            generation_outputs = next_state
            generate = theano.function(generation_inputs, generation_outputs)

            self.predict = predict
            self.generate = generate
        elif option["decoder"] == "GruCond":
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys_src,
                src_mask, soft_states, mapped_keys_soft, soft_mask
            ]
            prediction_outputs = [probs, next_state]
            predict = theano.function(prediction_inputs, prediction_outputs)
            self.predict = predict

        self.cost = cost
        self.inputs = training_inputs
        self.outputs = training_outputs
        self.updates = []
        self.align = None
        self.sample = None
        self.encode = encode

        self.get_snt_cost = get_snt_cost
        self.option = option
Ejemplo n.º 60
0
    def theano_expr(self, targets, mode='stack', sparse=False):
        """
        Return the one-hot transformation as a symbolic expression.
        If labels appear multiple times, their value in the one-hot
        vector is incremented.

        Parameters
        ----------
        targets : tensor_like, 1- or 2-dimensional, integer dtype
            A symbolic tensor representing labels as integers
            between 0 and `max_labels` - 1, `max_labels` supplied
            at formatter construction.
        mode : string
            The way in which to convert the labels to arrays. Takes
            three different options:

              - "concatenate" : concatenates the one-hot vectors from
                multiple labels
              - "stack" : returns a matrix where each row is the
                one-hot vector of a label
              - "merge" : merges the one-hot vectors together to
                form a vector where the elements are
                the result of an indicator function
                NB: As the result of an indicator function
                the result is the same in case a label
                is duplicated in the input.
        sparse : bool
            If true then the return value is sparse matrix. Note that
            if sparse is True, then mode cannot be 'stack' because
            sparse matrices need to be 2D

        Returns
        -------
        one_hot : TensorVariable, 1, 2 or 3-dimensional, sparse or dense
            A symbolic tensor representing a one-hot encoding of the
            supplied labels.
        """
        if mode not in ('concatenate', 'stack', 'merge'):
            raise ValueError("%s got bad mode argument '%s'" %
                             (self.__class__.__name__, str(self._max_labels)))
        elif mode == 'stack' and sparse:
            raise ValueError("Sparse matrices need to be 2D, hence they"
                             "cannot be stacked")
        squeeze_required = False
        if targets.ndim != 2:
            if targets.ndim == 1:
                squeeze_required = True
                targets = targets.dimshuffle('x', 0)
            else:
                raise ValueError("targets tensor must be 1 or 2-dimensional")
        if 'int' not in str(targets.dtype):
            raise TypeError("need an integer tensor for targets")
        if sparse:
            if mode == 'concatenate':
                one_hot = theano.sparse.CSR(
                    tensor.ones_like(targets, dtype=self._dtype).flatten(),
                    (targets.flatten() +
                     tensor.arange(targets.size) * self._max_labels) %
                    (self._max_labels * targets.shape[1]),
                    tensor.arange(targets.shape[0] + 1) * targets.shape[1],
                    tensor.stack(targets.shape[0],
                                 self._max_labels * targets.shape[1]))
            else:
                one_hot = theano.sparse.CSR(
                    tensor.ones_like(targets, dtype=self._dtype).flatten(),
                    targets.flatten(),
                    tensor.arange(targets.shape[0] + 1) * targets.shape[1],
                    tensor.stack(targets.shape[0], self._max_labels))
        else:
            if mode == 'concatenate':
                one_hot = tensor.zeros(
                    (targets.shape[0] * targets.shape[1], self._max_labels))
                one_hot = tensor.set_subtensor(
                    one_hot[tensor.arange(targets.size),
                            targets.flatten()], 1)
                one_hot = one_hot.reshape(
                    (targets.shape[0], targets.shape[1] * self._max_labels))
            elif mode == 'merge':
                one_hot = tensor.zeros((targets.shape[0], self._max_labels))
                one_hot = tensor.set_subtensor(
                    one_hot[tensor.arange(targets.size) % targets.shape[0],
                            targets.T.flatten()], 1)
            else:
                one_hot = tensor.zeros(
                    (targets.shape[0], targets.shape[1], self._max_labels))
                one_hot = tensor.set_subtensor(
                    one_hot[tensor.arange(targets.shape[0]).reshape(
                        (targets.shape[0], 1)),
                            tensor.arange(targets.shape[1]), targets], 1)
            if squeeze_required:
                if one_hot.ndim == 2:
                    one_hot = one_hot.reshape((one_hot.shape[1], ))
                if one_hot.ndim == 3:
                    one_hot = one_hot.reshape(
                        (one_hot.shape[1], one_hot.shape[2]))
        return one_hot