Python log Examples, theano.tensor.log Python Examples

Example #1

0

Show file

File: SimpledAclass.py Project: pombredanne/DeepANN-sparse

    def get_cost_updates(self, x, W, W_prime, b, b_prime, corruption_level, learning_rate, l2reg=0., l1reg=0.):
        """ This function computes the cost and the updates for one trainng
        step of the dA """
        self.x = x
        self.W = W
        self.W_prime = W_prime
        self.b = b
        self.b_prime = b_prime
        self.params = [self.W, self.W_prime, self.b, self.b_prime]
        if corruption_level == None:
            tilde_x = self.x
        else:
            tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y       = self.get_hidden_values( tilde_x)
        z       = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using minibatches,
        #        L will  be a vector, with one entry per example in minibatch
        
        XE = self.x * T.log(z) + (1 - self.x) *  T.log(1-z)
        cost = -T.mean(T.sum(XE, axis=1),axis=0)
        
        if l2reg != 0.:
            cost += l2reg * (T.mean(T.sum(self.W*self.W,1),0) + T.mean(T.sum(self.W_prime*self.W_prime,1),0))
        if l1reg != 0.:
            cost += l1reg * (T.mean(T.sum(T.abs_(y),1),0) + T.mean(T.sum(T.abs_(y),1),0))
        # compute the gradients of the cost of the `dA` with respect
        # to its parameters 
        gparams = T.grad(cost, self.params)
#        # generate the list of updates
#        updates = {}
#        for param, gparam in zip(self.params, gparams):
#            updates[param] = param -  learning_rate*gparam
        updates = [-learning_rate*gparam for gparam in gparams]

        return (cost, updates)

Example #2

0

Show file

File: sbn.py Project: Riashat/reweighted-ws

    def sample(self, Y):
        """ Given samples from the upper layer Y, sample values from X
            and return then together with their log probability.

        Parameters
        ----------
        Y:      T.tensor
            samples from the upper layer

        Returns
        -------
        X:      T.tensor
            samples from the lower layer
        log_p:  T.tensor
            log-posterior for the samples returned in X
        """
        n_X, = self.get_hyper_params(['n_X'])
        W, b = self.get_model_params(['W', 'b'])

        n_samples = Y.shape[0]

        # sample X given Y
        prob_X = self.sigmoid(T.dot(Y, W) + b)
        U = theano_rng.uniform((n_samples, n_X), nstreams=512)
        X = T.cast(U <= prob_X, dtype=floatX)

        log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X)
        log_prob = log_prob.sum(axis=1)

        return X, log_prob

Example #3

0

Show file

File: dAutoencoder.py Project: cgallego/Section3

    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost, updates)

Example #4

0

Show file

File: logistic_softmax_regression.py Project: OlafLee/MachineLearningModels

 def cross_entropy(self, y):
     
     #return (-(y * T.log(self.y) + (1.0 - y) * T.log(1.0 - self.y))).mean()
     #return T.nnet.binary_crossentropy(self.y, y).mean()
      y_used = self.y
      y_used = T.clip(self.y, 0.0000001, 0.999999999)
      return T.mean(-y * T.log(y_used) - (1 - y) * T.log(1 - y_used))

Example #5

0

Show file

File: rbm.py Project: liangpj/Theano

    def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
        """ Approximation to the recontruction error

        Note that this function requires the pre-sigmoid actiavtion as input. To
        undertstand why this is so you need to understand a bit about how Theano works.
        Whenever you compile a Theano function, the computational graph that you pass as input
        gets optimized for speed and stability. This is done by changing several parts of
        the subgraphs with others. One such optimization expresses terms of softplus. We need this
        optimizations for the cross-entropy since sigmoid of numbers larger than 30. (or even less
        then that) return to 1. and numbers of smaller than -30, turn to 0 which ini terms will force
        theano to compute log(0) and thereforce we will get either -inf or NaN as cost. If the value is
        expressed in terms of softplus we do not get this undersirable behaviour. This optimiation usually
        works fine, but here we have a special case. The sigmoid is applied inside the scan op, while
        the log is outisde. Therefore Theano will only see log(scan(...)) instead of log(sigmoid(..))
        and will not apply the wanted optimization. We can not go and replace the sigmoid in scan
        with something else alse, because this only needs to be done on the last step. Therefore the
        easiest adn more efficient way is to get also teh pre-sigmoid activation as an output of
        scan, and apply both the log and sigmoid outside scan sunch that Theano can catch and optimize
        the expression.
        """

        cross_entropy = T.mean(
            T.sum(
                self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
                ( 1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
                axis=1
            )
        )

        return  cross_entropy

Example #6

0

Show file

File: gp.py Project: bballamudi/pymc3

 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)

Example #7

0

Show file

File: AE.py Project: felidadae/dnn

    def compileFunctions(self, x_image_global, examples, ib, B, K, corrupt):
        if x_image_global == None:
            x_image_global = self.x

        if corrupt == 0.0:
            self.x_c = self.x
        else:
            self.x_c = self.theano_rng.binomial(
                size=self.x.shape, n=1, p=1-corrupt,
                dtype=theano.config.floatX) * self.x

        self.h = self.g(T.dot(self.x_c, self.W_hl) + self.b_hl)
        self.x_r = self.o(T.dot(self.h, self.W_ol) + self.b_ol)
        self.params = [self.W_hl, self.b_hl, self.b_ol]
        self.cost = \
            (- T.sum(
                self.x * T.log(self.x_r) + (1 - self.x) * T.log(1 - self.x_r),
                axis=(0,1)))

        gparams = T.grad(self.cost, self.params)
        updates = [
            (param, param - K * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        fun_train = theano.function(
            inputs=[ib],
            outputs=(self.cost, self.x_r, self.x_c),
            updates=updates,
            givens={
                x_image_global: examples[ib*B: (ib+1)*B]
            }
        )

        return fun_train

Example #8

0

Show file

File: distributions.py Project: yamins81/MonteTheano

def binomial_lpdf(node, x, kw):
    random_state, size, n, p = node.inputs

    # for the n > 1 the "choose" operation is required
    # TODO assert n == 1

    return tensor.switch(tensor.eq(x, 1.0), tensor.log(p), tensor.log(1.0 - p))

Example #9

0

Show file

File: advi.py Project: sjtu2008/pymc3

def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2)
    l_int = l.astype('int64')
    u = uw[:l_int]
    w = uw[l_int:]

    # Callable tensor
    def logp_(input):
        return theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    if random_seed is None:
        r = MRG_RandomStreams(gen_random_state())
    else:
        r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * tt.exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * tt.exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))

    return elbo

Example #10

0

Show file

File: continuous.py Project: gurganious/pymc3

 def logp(self, value):
     mu = self.mu
     tau = self.tau
     return bound(-0.5 * tau * (T.log(value) - mu)**2
                  + 0.5 * T.log(tau/(2. * np.pi))
                  - T.log(value),
                  tau > 0)

Example #11

0

Show file

File: continuous.py Project: gurganious/pymc3

 def logp(self, value):
     alpha = self.alpha
     beta = self.beta
     return bound(T.log(alpha) - T.log(beta)
                  + (alpha - 1) * T.log(value/beta)
                  - (value/beta)**alpha,
                  value >= 0, alpha > 0, beta > 0)

Example #12

0

Show file

File: policy_ff.py Project: kyunghyuncho/gym

    def grad_init(self):
        mask_ = self.mask.flatten()
        rewards_ = self.rewards.flatten()
        actions_ = self.actions.reshape([self.actions.shape[0]*self.actions.shape[1],-1])

        #self.mov_std = theano.shared(numpy.float32(1.), 'std')

        pp = self.params.values()
        mean_rewards = (mask_ * rewards_).sum(-1, keepdims=True) / tensor.maximum(1., mask_.sum(-1, keepdims=True))
        centered_rewards = rewards_ - self.vapprox.v[:,0] - mean_rewards
        mean2_rewards = (mask_ * (rewards_ ** 2)).sum(-1, keepdims=True) / tensor.maximum(1., mask_.sum(-1, keepdims=True))
        var_rewards = mean2_rewards - (mean_rewards ** 2)
        scaled_rewards = centered_rewards  / tensor.maximum(1., tensor.sqrt(tensor.maximum(0., var_rewards)))
        #scaled_rewards = centered_rewards

        logprob = 0.
        reg = 0.
        for oi in xrange(self.n_out):
            labs = actions_[:,oi].flatten()
            labs_idx = tensor.arange(labs.shape[0]) * self.out_dim + labs
            logprob = logprob + (mask_ * tensor.log(self.pi[oi].flatten()+1e-6)[labs_idx])
            reg = reg - (self.pi[oi] * tensor.log(self.pi[oi]+1e-6)).sum(-1).sum(0)

        self.cost = -tensor.mean(scaled_rewards * logprob + self.reg_c * reg)
        self.grads = tensor.grad(self.cost, wrt=pp)

Example #13

0

Show file

File: spectrum_coding.py Project: jiangkid/spectrum_coding

    def get_cost(self, p=0, sigma=1):
        # the last layer    
        z = self.sigmoid_layers[-1].output
        L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        p_idx = len(self.sigmoid_layers)/2 - 1 #penalty layer, the middle layer        
        if p == 0:
            cost = T.mean(L)
#             cost = T.mean(T.sqrt(T.mean(self.errors, axis=1))) #Log Spectral Distance(LSD)
        elif (p != 0) and (sigma == 0):# for square penalty
            square_cost = self.get_square_cost(self.sigmoid_layers[p_idx].output, p)
            cost = T.mean(L) + T.mean(square_cost)
        elif(p != 0) and (sigma != 0):# for Gaussian penalty
            gaussian_cost = self.get_gaussian_cost(self.sigmoid_layers[p_idx].output, p, sigma)
            cost = T.mean(L) + T.mean(gaussian_cost)
#         elif(p == -1) and (sigma == 0):#binary
#             code_val = self.sigmoid_layers[p_idx].output
#             binary_val = code_val>=0.5
#             self.sigmoid_layers[p_idx+1].input = binary_val
#             z = self.sigmoid_layers[-1].output
#             L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
#             cost = T.mean(L)
#         elif(p == -1) and (sigma != 0):#add gaussian noise
#             gaussian_data = self.theano_rng.normal(size=self.sigmoid_layers[p_idx-1].output.shape, std=sigma,
#                                                    dtype=theano.config.floatX)
#             self.sigmoid_layers[p_idx].input = self.sigmoid_layers[p_idx-1].output + gaussian_data
#             z = self.sigmoid_layers[-1].output
#             L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
#             cost = T.mean(L)
        else:
            cost = T.mean(L)
        return cost

Example #14

0

Show file

File: oldunet2.py Project: Rhoana/icon

def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss

Example #15

0

Show file

File: autoencoder.py Project: lessc0de/crossfader

def get_model(Ws, bs, dropout=False):
    v = T.matrix('input')
    m = T.matrix('missing')
    q = T.matrix('target')
    k = T.vector('normalization factor')

    # Set all missing/target values to 0.5
    keep_mask = (1-m) * (1-q)
    h = keep_mask * (v * 2 - 1) # Convert to +1, -1
    
    # Normalize layer 0
    h *= k.dimshuffle(0, 'x')

    for l in xrange(len(Ws)):
        h = T.dot(h, Ws[l]) + bs[l]

        if l < len(Ws) - 1:
            h = h * (h > 0) # relu
            if dropout:
                mask = srng.binomial(n=1, p=0.5, size=h.shape)
                h = h * mask * 2

    output = sigmoid(h)
    LL = v * T.log(output) + (1 - v) * T.log(1 - output)
    # loss = -(q * LL).sum() / q.sum()
    loss = -((1 - m) * LL).sum() / (1 - m).sum()

    return v, m, q, k, output, loss

Example #16

0

Show file

File: multi_AE.py Project: zanghu/MyDNNmodule

 def expr(self, model, data):
     
     v = data
     mid = model.get_enc(v)
     rou_mid = mid.mean(axis=0)
     cs294_sparse = (self.rou * T.log(self.rou / rou_mid) + (1 - self.rou) * T.log((1 - self.rou) / (1 - rou_mid))).sum()
     return cs294_sparse

Example #17

0

Show file

File: __init__.py Project: donghyunlee/play

def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, "x")

    mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner = -0.5 * T.sum(T.sqr(y - mu) / sig ** 2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2)

    nll = -logsumexp(T.log(coeff) + inner, axis=-1)

    # Adjust dimension
    new_dim = T.set_subtensor(shape_y[-1], 1)

    nll = nll.reshape(new_dim, ndim=n_dim)
    nll = nll.flatten(n_dim - 1)

    return nll

Example #18

0

Show file

File: layer.py Project: alxrsngrtn/LearnedNormPooling

    def lp_norm(self, n, k, r, c, z):
        '''
        Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
        :param n:
        :param k:
        :param r:
        :param c:
        :param z:
        :return:
        '''
        ds0, ds1 = self.pool_size
        st0, st1 = self.stride
        pad_h = self.pad[0]
        pad_w = self.pad[1]

        row_st = r * st0
        row_end = T.minimum(row_st + ds0, self.img_rows)
        row_st = T.maximum(row_st, self.pad[0])
        row_end = T.minimum(row_end, self.x_m2d + pad_h)

        col_st = c * st1
        col_end = T.minimum(col_st + ds1, self.img_cols)
        col_st = T.maximum(col_st, self.pad[1])
        col_end = T.minimum(col_end, self.x_m1d + pad_w)

        Lp = T.pow(
                T.mean(T.pow(
                        T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
                        1 + T.log(1 + T.exp(self.P))
                )),
                1 / (1 + T.log(1 + T.exp(self.P)))
        )

        return T.set_subtensor(z[n, k, r, c], Lp)

Example #19

0

Show file

File: ctc_cost.py Project: trungnt13/Lasagne

def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol, log_scale=True):
    """
    Based on code from Shawn Tan.
    Credits to Kyle Kastner as well.

    This function computes the CTC log likelihood for a sequence that has
    been augmented with blank labels.


    """
    y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype="int32")
    y_mask_len = tensor.sum(y_mask, axis=0, dtype="int32")

    if log_scale:
        log_probabs = _log_path_probabs(y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
        batch_size = log_probabs.shape[1]

        # Add the probabilities of the final time steps to get the total
        # sequence likelihood.
        log_labels_probab = _log_add(
            log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1],
            log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2],
        )
    else:
        probabilities = _path_probabs(y, y_hat, y_mask, y_hat_mask, blank_symbol)
        batch_size = probabilities.shape[1]
        labels_probab = (
            probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1]
            + probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2]
        )
        log_labels_probab = tensor.log(labels_probab)
    return log_labels_probab

Example #20

0

Show file

File: rnn.py Project: OlafLee/MachineLearningModels

 def __init__(self, n_in, n_out, n_h, learning_rate=0.12):
    self.x = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    self.target = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    bound_x = numpy.sqrt(6. / (n_in + n_h))
    bound_h = numpy.sqrt(6. / (n_h + n_h))
    self.params = []
    self.w_x = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_in, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_x)
    self.w_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_h)
    self.b_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b_h)
    self.w = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_out)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w)
    self.b = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_out,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b)
    self.h0 = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.h0)
    
    def one_step(x, h1):
        h = T.nnet.sigmoid(T.dot(x, self.w_x) + T.dot(h1, self.w_h) + self.b_h)
        y = T.nnet.sigmoid(T.dot(h, self.w) + self.b)
        return h, y
    
    [hs, ys], _ = theano.scan(fn=one_step, sequences=self.x, outputs_info=[self.h0, None])
    cost = -T.mean(self.target * T.log(ys) + (1 - self.target) * T.log(1 - ys))
    grads = T.grad(cost, self.params)
    
    updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, grads)]
    
    self.train = theano.function([self.x, self.target], cost, updates=updates)
    
    self.predict = theano.function([self.x], ys)

Example #21

0

Show file

File: cA.py Project: 2php/DeepLearningTutorials

    def get_cost_updates(self, contraction_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the cA """

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        J = self.get_jacobian(y, self.W)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        self.L_rec = - T.sum(self.x * T.log(z) +
                             (1 - self.x) * T.log(1 - z),
                             axis=1)

        # Compute the jacobian and average over the number of samples/minibatch
        self.L_jacob = T.sum(J ** 2) // self.n_batchsize

        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)

        # compute the gradients of the cost of the `cA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)

Example #22

0

Show file

File: test_sigm.py Project: LEEKYOUNGHUN/Theano

    def test_log1msigm_to_softplus(self):
        x = T.matrix()

        out = T.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = T.log(1 - T.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op, T.Flatten)
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = T.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        #assert len(topo) == 3
        assert any(isinstance(node.op, T.Reshape) for node in topo)
        assert any(isinstance(getattr(node.op, 'scalar_op', None),
                              theano.tensor.nnet.sigm.ScalarSoftplus)
                   for node in topo)
        f(numpy.random.rand(54, 11).astype(config.floatX))

Example #23

0

Show file

File: prmlp.py Project: caglar/prmlp

    def negative_log_likelihood(self, y):
        """ Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::
            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                    \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """

        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        if self.is_binary:
            -T.mean(T.log(self.p_y_given_x))
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

Example #24

0

Show file

File: hf_examples.py Project: TPp/theano-hf

def simple_RNN(nh):
  Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX))
  Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX))
  Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX))
  bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX))
  h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  p = [Wx, Wh, Wy, bh, by, h0]

  x = T.matrix()

  def recurrence(x_t, h_tm1):
    h_t = T.tanh(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh)
    s_t = T.dot(h_t, Wy) + by
    return [h_t, s_t]

  ([h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[h0, dict()])

  t = x[0, 0]
  s = activations[-1, 0]
  y = T.nnet.sigmoid(s)
  loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14)
  acc = T.neq(T.round(y), t)
  
  return p, [x], s, [loss, acc], h

Example #25

0

Show file

File: cA.py Project: LazyXuan/DECRES

    def get_cost_updates(self, contraction_level, learning_rate, cost_measure="cross_entropy"):
        """ This function computes the cost and the updates for one trainng
        step of the cA """

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        J = self.get_jacobian(y, self.W)

        if cost_measure=="cross_entropy":
            #self.L_rec = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
            self.L_rec = T.mean(- T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z),axis=1))
        elif cost_measure=="euclidean":
            self.L_rec = T.mean(T.sum((self.x-z)**2,axis=1)) 
            
        # Compute the jacobian and average over the number of samples/minibatch
        self.L_jacob = T.mean(T.sum(J ** 2) / self.n_batchsize)
        
        cost = self.L_rec + contraction_level * self.L_jacob

        # compute the gradients of the cost of the `cA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)

Example #26

0

Show file

File: invertible_layers.py Project: matt-graham/differentiable-generator-networks

 def forward_jacobian_log_det(self, x):
     dy_dx, _ = th.scan(lambda x_i: th.grad(self.forward_func(x_i), x_i),
                        sequences=[x.flatten()])
     if self.fudge != 0.:
         return tt.log(dy_dx + self.fudge).sum()
     else:
         return tt.log(dy_dx).sum()

Example #27

0

Show file

File: invertible_layers.py Project: matt-graham/differentiable-generator-networks

 def forward_jacobian_log_det(self, x):
     y_sum = self.forward_map(x).sum()
     dy_dx = th.grad(y_sum, x)
     if self.fudge != 0.:
         return tt.log(dy_dx + self.fudge).sum()
     else:
         return tt.log(dy_dx).sum()

Example #28

0

Show file

File: pooled_ssrbm.py Project: gdesjardins/hossrbm

    def get_sparsity_cost(self):

        # update mean activation using exponential moving average
        hack_h = self.h_given_v(self.sp_pos_v)

        # define loss based on value of sp_type
        if self.sp_type == 'kl':
            eps = npy_floatX(1./self.batch_size)
            loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                     - npy_floatX(1-targ) * T.log(1 - val + eps)
        else:
            raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type)

        cost = T.zeros((), dtype=floatX)

        params = []
        if self.sp_weight['h']: 
            cost += self.sp_weight['h']  * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0)))
            params += [self.hbias]

        if self.sp_type in ['kl'] and self.sp_weight['h']:
            params += [self.Wv, self.alpha, self.mu]
            if self.flags['split_norm']:
                params += [self.scalar_norms]

        return costmod.Cost(cost, params)

Example #29

0

Show file

File: invertible_layers.py Project: matt-graham/differentiable-generator-networks

 def forward_jacobian_log_det(self, x):
     if x.ndim == 1:
         return tt.log(tt.abs_(self.diag_weights)).sum()
     elif x.ndim == 2:
         return x.shape[0] * tt.log(tt.abs_(self.diag_weights)).sum()
     else:
         raise ValueError('x must be one or two dimensional.')

Example #30

0

Show file

File: tmall.py Project: wait4pumpkin/tmall

    def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
        cross_entropy = T.mean(
                T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
                (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
                      axis=1))

        return cross_entropy

Example #31

0

Show file

File: dnn.py Project: qnix/NeuralNet

 def hinge_loss(self, y):
     return -T.mean(T.log(self.p_y_given_x)[:,y]) # TODO

Example #32

0

Show file

 def forward(self, x):
     return tt.switch(x < 1, tt.log(x), x - 1.0)

Example #33

0

Show file

File: theano_backend.py Project: utepdatamining/keras

def log(x):
    return T.log(x)

Example #34

0

Show file

File: mixture.py Project: zaczou/pymc3

 def logp(self, value):
     w = self.w
     
     return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1).sum(),
                  w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1))

Example #35

0

Show file

    def __init__(self, dim, n_entities, batch_size=None, validation_samples=2):

        self.__dict__.update(locals())
        del self.self

        theano_rng = RandomStreams(numpy.random.randint(2**30))

        #Start by defining the graph

        ##Parameter setup
        self.emb = theano.shared((numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX))
        self.emb.tag.test_value = (numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX)

        self.a = theano.shared(numpy.asarray(1.0).astype(theano.config.floatX))
        self.b = theano.shared(numpy.asarray(0.0).astype(theano.config.floatX))

        self.params = [self.emb, self.a, self.b]

        ### Input setup!
        self.x1_idxs = T.ivector()
        self.x2_idxs = T.ivector()
        self.x1_idxs.tag.test_value = numpy.asarray([0, 1], dtype=numpy.int32)
        self.x2_idxs.tag.test_value = numpy.asarray([1, 2], dtype=numpy.int32)

        #generate negative samples
        choice = theano_rng.binomial(size=self.x1_idxs.shape)
        alternative = theano_rng.random_integers(size=self.x1_idxs.shape,
                                                 low=0,
                                                 high=n_entities - 1)
        self.x1_idxs_negative = T.switch(choice, self.x1_idxs, alternative)
        self.x2_idxs_negative = T.switch(choice, alternative, self.x2_idxs)

        ### Define graph from input to predictive loss
        def get_embed(index_tensor):
            return sigmoid(self.emb[index_tensor].reshape(
                (index_tensor.shape[0], self.dim)))

        x1_emb = get_embed(self.x1_idxs)
        x2_emb = get_embed(self.x2_idxs)
        x1neg_emb = get_embed(self.x1_idxs_negative)
        x2neg_emb = get_embed(self.x2_idxs_negative)

        def get_prob1(embed_tensor1, embed_tensor2):
            return sigmoid(
                self.a * T.mean(embed_tensor1 * embed_tensor2 +
                                (1 - embed_tensor1) * (1 - embed_tensor2),
                                axis=1) +
                self.b)  #probability of a link, 0 to 1.'

        self.loss = T.mean(-T.log(get_prob1(x1_emb, x2_emb)) -
                           T.log(1 - get_prob1(x1neg_emb, x2neg_emb)))

        ###Define graph from input to sampled/validated loss
        randomizationA = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))
        randomizationB = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))

Example #36

0

Show file

File: dnn.py Project: qnix/NeuralNet

def softplus_f(v):
    return T.log(1 + T.exp(v))

Example #37

0

Show file

File: dnn.py Project: qnix/NeuralNet

 def hinge_loss_sum(self, y):
     return -T.sum(T.log(self.p_y_given_x)[:,y])

Example #38

0

Show file

def tlogit(x):
    return T.log(x / (np.float32(1) - x))

Example #39

0

Show file

File: HR_Pred_using_theano.py Project: AbdurNawaz/HR_Predictions

b2_init = np.zeros(output_size)

thX = T.matrix("X")
thT = T.matrix("T")
W1 = theano.shared(W1_init, "W1")
W2 = theano.shared(W2_init, "W2")
b1 = theano.shared(b1_init, "b1")
b2 = theano.shared(b2_init, "b2")

thZ = T.nnet.relu(thX.dot(W1) + b1)

thY = T.nnet.softmax(thZ.dot(W2) + b2)

prediction = T.argmax(thY, axis=1)

cost = -(thT * T.log(thY)).sum() + reg * ((W1 * W1).sum() + (b1 * b1).sum() +
                                          (W2 * W2).sum() + (b2 * b2).sum())

update_W1 = W1 - lr * T.grad(cost, W1)
update_b1 = b1 - lr * T.grad(cost, b1)
update_W2 = W2 - lr * T.grad(cost, W2)
update_b2 = b2 - lr * T.grad(cost, b2)

train = theano.function([thX, thT],
                        updates=[(W1, update_W1), (W2, update_W2),
                                 (b1, update_b1), (b2, update_b2)])

get_prediction = theano.function(inputs=[thX, thT], outputs=[cost, prediction])

costs = []

Example #40

0

Show file

File: dnn.py Project: qnix/NeuralNet

 def negative_log_likelihood_sum(self, y):
     return -T.sum(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

Example #41

0

Show file

File: vae.py Project: lipiji/NRT-theano

 def kld(self, mu, var):
     return 0.5 * T.sum(1 + T.log(var) - mu**2 - var, axis=1)

Example #42

0

Show file

def LME(x, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    return T.log(T.mean(T.exp(x), axis, dtype, keepdims, acc_dtype))

Example #43

0

Show file

 def f_softplus(x):
     return T.log(T.exp(x) + 1)  # - np.log(2)

Example #44

0

Show file

File: vae.py Project: lipiji/NRT-theano

 def cost_nll(self, pred, label):
     cost = -T.log(pred) * label
     cost = T.mean(T.sum(cost, axis=1))
     return cost

Example #45

0

Show file

File: word_model_sim.py Project: zhuyuying/Semantic-Texual-Similarity-Toolkits

    def __init__(self, We_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p  #containes We

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))

        if params.traintype == "rep":
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_out = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })

        g1_dot_g2 = embg1 * embg2
        g1_abs_g2 = abs(embg1 - embg2)

        lin_dot = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        lin_abs = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(
            l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)
        if params.task == "sim":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  self.nout,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })
            Y = T.log(X)

            cost = scores * (T.log(scores) - Y)
            cost = cost.sum(axis=1) / (float(self.nout))

            prediction = 0.
            i = params.minval
            while i <= params.maxval:
                prediction = prediction + i * X[:, i - 1]
                i += 1
        elif params.task == "ent":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  3,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })

            cost = theano.tensor.nnet.categorical_crossentropy(X, scores)

            prediction = T.argmax(X, axis=1)
        else:
            raise ValueError('Params.task not set correctly.')

        # if params.l_out == '':
        #     lasagne.layers.set_all_param_values(l_out, s)
        #
        # if params.l_so
        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)

        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)
        self.cost_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask],
            cost,
            updates=updates)

Example #46

0

Show file

File: vae.py Project: lipiji/NRT-theano

 def multivariate_bernoulli(self, y_pred, y_true):
     return T.sum(y_true * T.log(y_pred) + (1 - y_true) * T.log(1 - y_pred),
                  axis=1)

Example #47

0

Show file

    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # mask.shape : (batch_size, seq_len) or None
        mask = self.dataset.symb_mask

        # mu.shape = (batch_size, seq_len, K, M, target_dims)
        mu = model_output[:, :, :, :, 0:3]

        # sigma.shape = (batch_size, seq_len, K, M, target_dims)
        sigma = model_output[:, :, :, :, 3:6]

        # Stack K targets for each input (sliding window style)
        # targets.shape = (batch_size, seq_len, K, target_dims)
        targets = T.stack([
            self.dataset.symb_targets[:, i:(-self.model.k + i + 1) or None]
            for i in range(self.model.k)
        ],
                          axis=2)

        # Add new axis for sum over M
        # targets.shape = (batch_size, seq_len, K, 1, target_dims)
        targets = targets[:, :, :, None, :]

        # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work).
        normalized_mu = mu[:, :, 0, 0] / l2distance(
            mu[:, :, 0, 0], keepdims=True, eps=1e-8)
        normalized_targets = targets[:, :, 0, 0] / l2distance(
            targets[:, :, 0, 0], keepdims=True, eps=1e-8)
        self.L2_error_per_item = T.sqrt(
            T.sum(((normalized_mu - normalized_targets)**2), axis=2))
        if mask is not None:
            self.mean_sqr_error = T.sum(self.L2_error_per_item * mask,
                                        axis=1) / T.sum(mask, axis=1)
        else:
            self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1)

        # Likelihood of multivariate gaussian (n dimensions) is :
        # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu))
        # We suppose a diagonal covariance matrix, so we have :
        #   => |\Sigma| = \prod_n \sigma_n^2
        #   => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2
        m_log_likelihoods = -np.float32(
            (self.target_dims / 2.) * np.log(2 * np.pi)) + T.sum(
                -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4)

        # k_losses_per_timestep.shape : (batch_size, seq_len, K)
        self.k_losses_per_timestep = T.log(self.m) - logsumexp(
            m_log_likelihoods, axis=3, keepdims=False)

        # loss_per_timestep.shape : (batch_size, seq_len)
        self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2)

        # Average over sequence steps.
        # k_nlls_per_seq.shape :(batch_size, K)
        if mask is not None:
            self.k_losses_per_seq = T.sum(
                self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum(
                    mask, axis=1, keepdims=True)
        else:
            self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1)

        # Average over K
        # loss_per_seq.shape :(batch_size,)
        self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1)
        return self.loss_per_seq

Example #48

0

Show file

    def factors(self, x, z, A):

        v = self.v
        w = self.w
        '''
        z is unused
        x['x'] is the data
        
        The names of dict z[...] may be confusing here: the latent variable z is not included in the dict z[...],
        but implicitely computed from epsilon and parameters in w.

        z is computed with g(.) from eps and variational parameters
        let logpx be the generative model density: log p(x|z) where z=g(.)
        let logpz be the prior of Z plus the entropy of q(z|x): logp(z) + H_q(z|x)
        So the lower bound L(x) = logpx + logpz
        
        let logpv and logpw be the (prior) density of the parameters
        '''
        def f_softplus(x):
            return T.log(T.exp(x) + 1)  # - np.log(2)

        def f_rectlin(x):
            return x * (x > 0)

        def f_rectlin2(x):
            return x * (x > 0) + 0.01 * x

        nonlinear = {
            'tanh': T.tanh,
            'sigmoid': T.nnet.sigmoid,
            'softplus': f_softplus,
            'rectlin': f_rectlin,
            'rectlin2': f_rectlin2
        }
        nonlinear_q = nonlinear[self.nonlinear_q]
        nonlinear_p = nonlinear[self.nonlinear_p]

        #rng = rng_curand.CURAND_RandomStreams(0)
        import theano.tensor.shared_randomstreams
        rng = theano.tensor.shared_randomstreams.RandomStreams(0)

        # Compute q(z|x,y)
        #
        # it seems that z = f(v['w0x'] * x + v['w0y'] * y + b)
        #
        hidden_q = [
            nonlinear_q(
                T.dot(v['w0x'], x['x']) + T.dot(v['w0y'], x['y']) +
                T.dot(v['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_q)):
            hidden_q.append(
                nonlinear_q(
                    T.dot(v['w' + str(i)], hidden_q[-1]) +
                    T.dot(v['b' + str(i)], A)))

        q_mean = T.dot(v['mean_w'], hidden_q[-1]) + T.dot(v['mean_b'], A)
        if self.type_qz == 'gaussian' or self.type_qz == 'gaussianmarg':
            q_logvar = T.dot(v['logvar_w'], hidden_q[-1]) + T.dot(
                v['logvar_b'], A)
        else:
            raise Exception()

        # function for distribution q(z|x)
        theanofunc = lazytheanofunc('warn', mode='FAST_RUN')
        self.dist_qz['z'] = theanofunc([x['x'], x['mean_prior'], x['y']] + [A],
                                       [q_mean, q_logvar])

        # Compute virtual sample
        eps = rng.normal(size=q_mean.shape, dtype='float32')
        _z = q_mean + T.exp(0.5 * q_logvar) * eps

        # Compute log p(x|z)
        #
        # log p(x | z, y)
        # It seems that x = f((w0y * y + w0z * z) + b0)
        #
        hidden_p = [
            nonlinear_p(
                T.dot(w['w0y'], x['y']) + T.dot(w['w0z'], _z) +
                T.dot(w['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_p)):
            hidden_p.append(
                nonlinear_p(
                    T.dot(w['w' + str(i)], hidden_p[-1]) +
                    T.dot(w['b' + str(i)], A)))
            if self.dropout:
                hidden_p[-1] *= 2. * (rng.uniform(size=hidden_p[-1].shape,
                                                  dtype='float32') > .5)

        if self.type_px == 'bernoulli':
            p = T.nnet.sigmoid(
                T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A))
            _logpx = -T.nnet.binary_crossentropy(p, x['x'])
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A], p)
        elif self.type_px == 'gaussian':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.normal2(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])
        elif self.type_px == 'laplace':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.laplace(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])

        else:
            raise Exception("")

        # Note: logpx is a row vector (one element per sample)
        logpx = T.dot(shared32(np.ones((1, self.n_x))),
                      _logpx)  # logpx = log p(x|z,w)

        # log p(y) (prior of y)
        #_logpy = w['logpy']
        #if self.uniform_y: _logpy *= 0
        #py_model = T.nnet.softmax(T.dot(_logpy, A).T).T
        #logpy = (- T.nnet.categorical_crossentropy(py_model.T, x['y'].T).T).reshape((1,-1))
        #logpx += logpy
        #self.dist_px['y'] = theanofunc([A], py_model)

        # log p(z) (prior of z)
        #
        # E_q[log(p(z))]
        #
        if self.type_pz == 'gaussianmarg':
            logpz = -0.5 * (np.log(2 * np.pi) + (
                (q_mean - x['mean_prior'])**2 + T.exp(q_logvar))).sum(
                    axis=0, keepdims=True)
        elif self.type_pz == 'gaussian':
            logpz = ap.logpdfs.standard_normal(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'mog':
            pz = 0
            for i in range(self.n_mixture):
                pz += T.exp(
                    ap.logpdfs.normal2(_z, T.dot(w['mog_mean' + str(i)], A),
                                       T.dot(w['mog_logvar' + str(i)], A)))
            logpz = T.log(pz).sum(axis=0, keepdims=True) - self.n_z * np.log(
                float(self.n_mixture))
        elif self.type_pz == 'laplace':
            logpz = ap.logpdfs.standard_laplace(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'studentt':
            logpz = ap.logpdfs.studentt(_z, T.dot(T.exp(w['logv']),
                                                  A)).sum(axis=0,
                                                          keepdims=True)
        else:
            raise Exception("Unknown type_pz")

        # loq q(z|x) (entropy of z)
        #
        # E_q[-log(q)]
        #
        if self.type_qz == 'gaussianmarg':
            logqz = -0.5 * (np.log(2 * np.pi) + 1 + q_logvar).sum(
                axis=0, keepdims=True)
        elif self.type_qz == 'gaussian':
            logqz = ap.logpdfs.normal2(_z, q_mean, q_logvar).sum(axis=0,
                                                                 keepdims=True)
        else:
            raise Exception()

        # Note: logpv and logpw are a scalars
        def f_prior(_w, prior_sd=self.prior_sd):
            return ap.logpdfs.normal(_w, 0, prior_sd).sum()

        logpv = 0
        logpv += f_prior(v['w0x'])
        logpv += f_prior(v['w0y'])
        for i in range(1, len(self.n_hidden_q)):
            logpv += f_prior(v['w' + str(i)])
        logpv += f_prior(v['mean_w'])
        if self.type_qz in ['gaussian', 'gaussianmarg']:
            logpv += f_prior(v['logvar_w'])

        logpw = 0
        logpw += f_prior(w['w0y'])
        logpw += f_prior(w['w0z'])
        for i in range(1, len(self.n_hidden_p)):
            logpw += f_prior(w['w' + str(i)])
        logpw += f_prior(w['out_w'])
        if self.type_px in ['sigmoidgaussian', 'gaussian', 'laplace']:
            logpw += f_prior(w['out_logvar_w'])
        if self.type_pz == 'studentt':
            logpw += f_prior(w['logv'])

        #return logpv, logpw, logpx, logpz, logqz
        return logpx, logpz, logqz

Example #49

0

Show file

 def KL(self, y):
     return T.mean(y * T.log(y / self.y_pred) +
                   (1 - y) * T.log((1 - y) / (1 - self.y_pred)))

Example #50

0

Show file

File: logistic_regression.py Project: LJSthu/learn-Theano

x = T.dmatrix("x")
y = T.dvector("y")
learning_rate = T.dscalar("lr")

# declare the weight w and b
w = theano.shared(value=numpy.random.rand(feat), name="w")
b = theano.shared(value=0., name="b")

print("initialized weights \n")
print(w.get_value())
print(b.get_value())

# build the graph
output = 1/(1+T.exp(-T.dot(x, w)-b))
prediction = output > 0.5
cross_entropy = -y * T.log(output) - (1-y)*T.log(1-output)
loss = cross_entropy.mean() + 0.01*(w**2).sum()
gradW, gradb = T.grad(loss, [w, b])

# train function
train = theano.function(inputs=[x,y,learning_rate], outputs=[prediction, cross_entropy,loss, learning_rate], \
                        updates=((w,w-learning_rate*gradW), (b,b-learning_rate*gradb)))
# predict function
predict = theano.function(inputs=[x], outputs=prediction)

for i in range(training_step):
    if (i < 1000):
        learning_rate = 0.1
    else:
        learning_rate =0.01
    pred, cro, l,lr = train(D[0], D[1], learning_rate)

Example #51

0

Show file

File: util.py Project: tinyloop/dtp

def NLL(probs, labels) : # labels are not one-hot code 
    return - T.mean( T.log(probs)[T.arange(labels.shape[0]), T.cast(labels,'int32')] )

Example #52

0

Show file

File: Pcnn.py Project: smilelhh/pcnn_face

 def loss(self,delta):
     #return T.log(1+T.exp(euclid(self.output1,self.output2)))
     #return T.log(1+T.exp(T.sqrt(T.sum(T.sqr(self.output1-self.output2)))))
     #return T.log(1+T.exp(T.sqrt(T.sum(T.sqr(self.output1)))))
     return T.log(1+T.exp(delta*(T.sum(T.sqr(self.output1-self.output2)))))

Example #53

0

Show file

File: GRBM.py Project: freerangehen/GRBM

    def trainMB(self, V_egMin, noOfEpoch, noOfMiniBatchEx):
        """
        trains the current RBM object, returns nothing with parameter updates being internal
        
        args:
        V_egMin (theano.shared 2D array): call eval() to supply as argument. rows of this are input examples. V_egMin[N:M] extracts M-N examples, each of size noOfVisible units
        noOfEpoch (int): total number of Epoch to simulate, each Epoch goes through V_egMin
        noOfMiniBatchEx (int): number of examples to be grouped into minibatches
        
        """
        self.miniBatchSize = noOfMiniBatchEx
        print("size of input example is: " + str(V_egMin.shape))
        V_egM = T.matrix(name="T_egM", dtype=theano.config.floatX)
        [V_CDmAcc, H_CDmAcc, H_CDmean, V_CDmean] , scan_updates = theano.scan(self.vtovMBall, outputs_info=[V_egM, None, None, None] , n_steps=self.CD_n)
        V_CDm = V_CDmAcc[-1] #these are matrixes
        H_CDm = H_CDmAcc[-1] #these are matrixes
        
       
        H_egM = self.vtohMB(V_egM)
        energyVector_eg = self.energyFnMB(V_egM, H_egM)
        energyVector_cd = self.energyFnMB(V_CDm, H_CDm)
        costFn = T.mean(energyVector_eg, dtype=theano.config.floatX, acc_dtype=theano.config.floatX) - T.mean(energyVector_cd, dtype=theano.config.floatX, acc_dtype=theano.config.floatX) 
        
        Ta_grad, Tb_grad, Tz_grad, Tomg_grad = T.grad(cost=costFn,
                                                        wrt=[self.T_a, self.T_b, self.T_z, self.T_omega],
                                                        consider_constant=[V_egM, H_egM, V_CDm, H_CDm])
        
        #regular gradient
        gradFromMB = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.T_a, self.T_a + self.aRate*Ta_grad),
                                                               (self.T_b, self.T_b + self.bRate*Tb_grad),
                                                               (self.T_z, self.T_z + self.sigmaRate*Tz_grad),
                                                               (self.T_omega, self.T_omega + self.omegaRate*Tomg_grad)],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))
        
        #rprop: Code not used
        Ta_rpropMag = T.mul(T.abs_(self.Ta_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Ta_grad_prev)+T.sgn(Ta_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Ta_grad_prev)+T.sgn(Ta_grad))-np.float32(2.0))))      
        Ta_rprop = T.mul(T.sgn(Ta_grad),Ta_rpropMag.clip(np.float32(self.epsilon),50))
        Tb_rpropMag = T.mul(T.abs_(self.Tb_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tb_grad_prev)+T.sgn(Tb_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tb_grad_prev)+T.sgn(Tb_grad))-np.float32(2.0))))      
        Tb_rprop = T.mul(T.sgn(Tb_grad),Tb_rpropMag.clip(np.float32(self.epsilon),50))
        Tz_rpropMag = T.mul(T.abs_(self.Tz_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tz_grad_prev)+T.sgn(Tz_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tz_grad_prev)+T.sgn(Tz_grad))-np.float32(2.0))) )     
        Tz_rprop = T.mul(T.sgn(Tz_grad),Tz_rpropMag.clip(np.float32(self.epsilon),50))
        Tomg_rpropMag = T.mul(T.abs_(self.Tomg_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tomg_grad_prev)+T.sgn(Tomg_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tomg_grad_prev)+T.sgn(Tomg_grad))-np.float32(2.0))))      
        Tomg_rprop = T.mul(T.sgn(Tomg_grad),Tomg_rpropMag.clip(np.float32(self.epsilon),50)) 
        gradFromMBrprop = theano.function(inputs=[V_egM], outputs=[Ta_rprop, Tb_rprop, Tz_rprop, Tomg_rprop], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.T_a, self.T_a + Ta_rprop),
                                                               (self.T_b, self.T_b + Tb_rprop),
                                                               (self.T_z, self.T_z + Tz_rprop),
                                                               (self.T_omega, self.T_omega + Tomg_rprop),
                                                               (self.Ta_grad_prev, Ta_rprop),
                                                               (self.Tb_grad_prev, Tb_rprop),
                                                               (self.Tz_grad_prev, Tz_rprop),
                                                               (self.Tomg_grad_prev, Tomg_rprop)],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))
        
        #RMSprop only: 
        [a_grad, b_grad, z_grad, omg_grad] = gradFromMB(V_egMin[0:noOfMiniBatchEx]) #initial RMS correction
        if (not(self.parameterLoaded) and not(self.parameterSaved)):
            self.Ta_rms.set_value(np.float32(np.abs(a_grad))) # =  theano.shared(value = np.float32(np.abs(a_grad)), name = 'Ta_rms', borrow=True, allow_downcast=True)
        Tb_rms =  theano.shared(value = np.float32(np.abs(b_grad)), name = 'Tb_rms', borrow=True, allow_downcast=True)
        Tz_rms =  theano.shared(value = np.float32(np.abs(z_grad)), name = 'Tz_rms', borrow=True, allow_downcast=True)
        Tomg_rms =  theano.shared(value = np.float32(np.abs(omg_grad)), name = 'Tomg_rms', borrow=True, allow_downcast=True)
        gradFromMBRMSprop = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.Ta_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Ta_rms,self.Ta_rms))+T.mul(np.float32(0.1),T.mul(Ta_grad,Ta_grad)))),
                                                               (Tb_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tb_rms,Tb_rms))+T.mul(np.float32(0.1),T.mul(Tb_grad,Tb_grad)))),
                                                               (Tz_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tz_rms,Tz_rms))+T.mul(np.float32(0.1),T.mul(Tz_grad,Tz_grad)))),
                                                               (Tomg_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tomg_rms,Tomg_rms))+T.mul(np.float32(0.1),T.mul(Tomg_grad,Tomg_grad)))),
                                                               (self.T_a, self.T_a + self.aRate*T.mul(Ta_grad,T.maximum(np.float32(self.epsilon),self.Ta_rms)**-1)),
                                                               (self.T_b, self.T_b + self.bRate*T.mul(Tb_grad,T.maximum(np.float32(self.epsilon),Tb_rms)**-1)),
                                                               (self.T_z, self.T_z + self.sigmaRate*T.mul(Tz_grad,T.maximum(np.float32(self.epsilon),Tz_rms)**-1)),
                                                               (self.T_omega, self.T_omega + self.omegaRate*T.mul(Tomg_grad,T.maximum(np.float32(self.epsilon),Tomg_rms)**-1))],
                                                                 mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))  
        
        #sparse hidden units optimization + RMSprop:   
        #first calculate probability of hidden units firing given visible examples:
        aVomg = T.dot(T.mul(T.fill(V_egM, T.exp(-self.T_z)), V_egM), self.T_omega)
        aT_Hp = T.nnet.sigmoid(T.fill(aVomg, self.T_b) + aVomg)#T.nnet.ultra_fast_sigmoid() did not work for us 
        aT_HpMean = T.mean(aT_Hp) # mean activation over minibatch and all Hk
        #cross entropy between mean hidden unit activation and target mean activation probability "self.sparseTargetp" 
        sparseHcost = T.mul(np.float32(-self.sparseTargetp), T.log(aT_HpMean)) - T.mul((np.float32(1.0)-self.sparseTargetp), T.log(np.float32(1.0)-aT_HpMean))
        
        Tb_gradH, Tz_gradH, Tomg_gradH = T.grad(cost=sparseHcost,
                                                        wrt=[self.T_b, self.T_z, self.T_omega],
                                                        consider_constant=[V_egM])
        sparseGradFn = theano.function(inputs = [V_egM], outputs =[Tb_gradH, Tz_gradH, Tomg_gradH], allow_input_downcast=True, mode = 'FAST_RUN')
        
        [b_gradH, z_gradH, omg_gradH] = sparseGradFn(V_egMin[0:noOfMiniBatchEx]) #initial RMS correction
        
        if (not(self.parameterLoaded) and not(self.parameterSaved)):
            self.Tb_rmsH.set_value(np.float32(np.abs(b_grad - b_gradH))) 
            self.Tz_rmsH.set_value(np.float32(np.abs(z_grad - z_gradH))) 
            self.Tomg_rmsH.set_value(np.float32(np.abs(omg_grad - omg_gradH))) 
        gradSparseH = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad, Tb_gradH, Tz_gradH, Tomg_gradH], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.Ta_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Ta_rms,self.Ta_rms))+T.mul(np.float32(0.1),T.mul(Ta_grad,Ta_grad)))),
                                                               (self.Tb_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tb_rmsH,self.Tb_rmsH))+T.mul(np.float32(0.1),T.mul(Tb_grad-Tb_gradH,Tb_grad-Tb_gradH)))),
                                                               (self.Tz_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tz_rmsH,self.Tz_rmsH))+T.mul(np.float32(0.1),T.mul(Tz_grad-Tz_gradH,Tz_grad-Tz_gradH)))),
                                                               (self.Tomg_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tomg_rmsH,self.Tomg_rmsH))+T.mul(np.float32(0.1),T.mul(Tomg_grad-Tomg_gradH,Tomg_grad-Tomg_gradH)))),
                                                               (self.T_a, self.T_a + self.aRate*T.mul(Ta_grad,T.maximum(np.float32(self.epsilon),self.Ta_rms)**-1)),
                                                               (self.T_b, self.T_b + self.bRate*T.mul(Tb_grad-Tb_gradH,T.maximum(np.float32(self.epsilon),self.Tb_rmsH)**-1)),
                                                               (self.T_z, self.T_z + self.sigmaRate*T.mul(Tz_grad-Tz_gradH,T.maximum(np.float32(self.epsilon),self.Tz_rmsH)**-1)),
                                                               (self.T_omega, self.T_omega + self.omegaRate*T.mul(Tomg_grad-Tomg_gradH,T.maximum(np.float32(self.epsilon),self.Tomg_rmsH)**-1))],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)) 
        
        #reconstruction errors:
        [V_egM_recon, H_egM_reconStub, H_meanStubC, V_meanStubC] = self.vtovMBall(V_egM)
        V_error = V_egM - V_egM_recon
        V_errorSqr = T.mul(V_error, V_error)
        reconError = theano.function(inputs = [V_egM], outputs = [T.mean(T.sum(V_errorSqr,axis=1, acc_dtype=theano.config.floatX), acc_dtype=theano.config.floatX)], 
                                     allow_input_downcast=True,
                                     mode='FAST_RUN')

        print("***************************************************************************************************")
        print("training network with " + str(self.nv) + " real visible units and " + str(self.nh) + " binary hidden units")
        print("reconstruction error before training = " + str(np.array(reconError(V_egMin))[0]))
        noOfMiniBatches = np.int(len(V_egMin)/noOfMiniBatchEx)
        print("number of mini-batches = " + str(noOfMiniBatches) + ", with " + str(noOfMiniBatchEx) + " examples per mini-batch")
        print("number of Epochs = " + str(noOfEpoch))
        print("***************************************************************************************************")        

        #input images already randomised with consecutive images belonging to different class, use directly as minibatch.
        for j in xrange(noOfEpoch):
            pretime=time.time()
            for i in xrange(noOfMiniBatches):
                [a_upDate, b_upDate, z_upDate, omg_upDate, b_upDateH, z_upDateH, omg_upDateH] = gradSparseH(V_egMin[i*noOfMiniBatchEx:(i+1)*noOfMiniBatchEx])
                
            myErr = reconError(V_egMin)
            self.likelihood4plot = self.likelihood4plot + [np.float32(myErr)]
            print("epoch " + str(j) + ": reconstruction error = " + str(myErr[0])  + ", time taken = " + str(time.time() - pretime))

        print("\n***************************************************************************************************") 
        print("reconstruction error after training for " + str(noOfEpoch) + " epochs = " + str(np.array(reconError(V_egMin))[0]))
        self.checkNaN()
        print("***************************************************************************************************")         
        
        plt.figure
        plt.plot(np.arange(0.0, len(self.likelihood4plot), 1), self.likelihood4plot)
        plt.show()

Example #54

0

Show file

 def CrossEntropy(self, y):
     return -T.mean(y * T.log(self.y_pred))

Example #55

0

Show file

File: project2b_help.py Project: puechtom/neuralnetworks

training_epochs = 25
learning_rate = 0.1
batch_size = 128

W1 = init_weights(28 * 28, 900)
b1 = init_bias(900)
b1_prime = init_bias(28 * 28)
W1_prime = W1.transpose()
W2 = init_weights(900, 10)
b2 = init_bias(10)

tilde_x = theano_rng.binomial(
    size=x.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * x
y1 = T.nnet.sigmoid(T.dot(tilde_x, W1) + b1)
z1 = T.nnet.sigmoid(T.dot(y1, W1_prime) + b1_prime)
cost1 = -T.mean(T.sum(x * T.log(z1) + (1 - x) * T.log(1 - z1), axis=1))

params1 = [W1, b1, b1_prime]
grads1 = T.grad(cost1, params1)
updates1 = [(param1, param1 - learning_rate * grad1)
            for param1, grad1 in zip(params1, grads1)]
train_da1 = theano.function(inputs=[x],
                            outputs=cost1,
                            updates=updates1,
                            allow_input_downcast=True)

p_y2 = T.nnet.softmax(T.dot(y1, W2) + b2)
y2 = T.argmax(p_y2, axis=1)
cost2 = T.mean(T.nnet.categorical_crossentropy(p_y2, d))

params2 = [W1, b1, W2, b2]

Example #56

0

Show file

 def negative_log_likelihood(self, y):
     return -T.mean(T.log(self.y_t)[:, y])

Example #57

0

Show file

 def get_tester(self, y):
     return self.inp, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]

Example #58

0

Show file

def build_model(alpha, beta, tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x_zheng = tensor.matrix('x_zheng', dtype='int32')
    x_zheng_mask = tensor.matrix('x_zheng_mask', dtype=config.floatX)
    x_ni = tensor.matrix('x_ni', dtype='int32')
    x_ni_mask = tensor.matrix('x_ni_mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int32')

    n_timesteps = x_zheng.shape[0]
    n_samples = x_zheng.shape[1]

    emb_zheng = tparams['Wemb'][x_zheng.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj1 = get_layer(options['encoder'])[1](tparams,
                                             emb_zheng,
                                             options,
                                             prefix='lstm_zheng',
                                             mask=x_zheng_mask)
    if options['encoder'] == 'lstm':
        proj_zheng = (proj1 * x_zheng_mask[:, :, None]).sum(axis=0)
        proj_zheng = proj_zheng / x_zheng_mask.sum(axis=0)[:, None]

    emb_ni = tparams['Wemb'][x_ni.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj2 = get_layer(options['encoder'])[1](tparams,
                                             emb_ni,
                                             options,
                                             prefix='lstm_ni',
                                             mask=x_ni_mask)

    if options['encoder'] == 'lstm':
        proj_ni = (proj2 * x_ni_mask[:, :, None]).sum(axis=0)
        proj_ni = proj_ni / x_ni_mask.sum(axis=0)[:, None]

    proj = tensor.concatenate((proj_zheng, proj_ni), axis=1)

    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)

    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])

    pred_zheng = tensor.nnet.softmax(
        tensor.dot(proj_zheng, tparams['U_zheng'] + tparams['b']))

    pred_ni = tensor.nnet.softmax(
        tensor.dot(proj_ni, tparams['U_ni'] + tparams['b']))

    f_pred_prob = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                                  pred,
                                  name='f_pred_prob')

    f_pred = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             pred.argmax(axis=1),
                             name='f_pred')

    f_proj = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             proj,
                             name='f_proj')

    off = 1e-8
    if pred.dtype == 'float16':
        off = 1e-6

    cost1 = -tensor.log(pred[tensor.arange(n_samples), y] + off).mean()
    cost2 = -tensor.log(pred_zheng[tensor.arange(n_samples), y] + off).mean()
    cost3 = -tensor.log(pred_ni[tensor.arange(n_samples), y] + off).mean()
    cost4 = tensor.sum(tensor.square(proj_zheng - proj_ni), axis=1).mean()
    cost = alpha * (cost1 + cost2 + cost3) + beta * cost4

    return use_noise, x_zheng, x_zheng_mask, x_ni, x_ni_mask, y, f_pred_prob, f_pred, cost1, cost2, cost3, cost4, cost, f_proj

Example #59

0

Show file

File: mixture.py Project: skeptycal/pymc3

    def logp(self, value):
        w = self.w

        return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
                     w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1),
                     broadcast_conditions=False)

Example #60

0

Show file

File: network3.py Project: michal-grzelak/digits-recognition-neural-network

 def cost(self, net):
     "Return the log-likelihood cost."
     return -T.mean(
         T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])