Exemple #1
0
 def vtovMBall(self, VsampM):
     """
     computes visible unit outputs given visible unit inputs (single MCMC iteration)
     multiple paralle MCMC iterations using rows of the input matrix
     
     args:
     VsampM (T.matrix): rows of this matrix are visible unit inputs
     
     return:
     ahtovMBres (T.matrix): rows of this matrix are visible unit outputs after a single MCMC iteration
     """
     #v to h part
     aVomg = T.matrix(name="Vomg", dtype=theano.config.floatX)
     avtohMBres = T.matrix(name ="vtohMBres", dtype=theano.config.floatX)
     aT_HP = T.matrix(name="T_HP", dtype=theano.config.floatX)
     
     aVomg = T.dot(T.mul(T.fill(VsampM, T.exp(-self.T_z)), VsampM), self.T_omega)
     aT_Hp = T.nnet.ultra_fast_sigmoid(T.fill(aVomg, self.T_b) + aVomg)
     avtohMBres = self.T_rng.binomial(size = aT_Hp.shape, p=aT_Hp, dtype=theano.config.floatX)
     
     #h to v part:
     aT_omgH = T.matrix(name="T_omgH", dtype=theano.config.floatX)
     aT_means = T.matrix(name="T_means", dtype=theano.config.floatX)
     ahtovMBres = T.matrix(name="htovMBres", dtype=theano.config.floatX)
     
     aT_omgH = T.transpose(T.dot(self.T_omega, T.transpose(avtohMBres)))
     aT_means = T.fill(aT_omgH, self.T_a) + aT_omgH
     ahtovMBres = self.T_rng.normal(size=aT_means.shape, avg=aT_means, std=T.fill(aT_means,T.sqrt(T.exp(self.T_z))), dtype=theano.config.floatX)
     return [ahtovMBres, avtohMBres, aT_Hp, aT_means]
Exemple #2
0
    def test_exp_over_1_plus_exp(self):
        m = self.get_mode(excluding=['local_elemwise_fusion'])

        x = T.dvector()

        # tests exp_over_1_plus_exp
        f = theano.function([x], T.exp(x)/(1+T.exp(x)), mode=m)
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [sigmoid]

        # tests inv_1_plus_exp
        f = theano.function([x], T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [sigmoid]

        # tests inv_1_plus_exp with neg
        f = theano.function([x], T.fill(x,-1.0) / (1+T.exp(-x)), mode=m)
        assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
                theano.tensor.inplace.neg_inplace]

        # tests double inv_1_plus_exp with neg
        # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
        # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
        # = - (sigm(x) * sigm(x))
        f = theano.function([x], (T.fill(x,-1.0)*T.exp(x)) / ((1+T.exp(x))*(1+T.exp(-x))), mode=m)
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
                T.mul, theano.tensor.inplace.neg_inplace]
Exemple #3
0
    def _gen_exprs(self, inpt):
        """Return the exprssions of the recognition model."""
        P = self.parameters.gen

        n_layers = len(self.n_hiddens_gen)
        hidden_to_hiddens = [
            getattr(P, 'hidden_to_hidden_%i' % i) for i in range(n_layers - 1)
        ]
        hidden_biases = [
            getattr(P, 'hidden_bias_%i' % i) for i in range(n_layers)
        ]
        initial_hidden_means = [
            getattr(P, 'initial_hidden_means_%i' % i) for i in range(n_layers)
        ]
        initial_hidden_vars = [
            getattr(P, 'initial_hidden_vars_%i' % i)**2 + 1e-4
            for i in range(n_layers)
        ]
        recurrents = [getattr(P, 'recurrent_%i' % i) for i in range(n_layers)]

        p_dropout_inpt = T.zeros_like(inpt[:, :, :self.n_latent])
        p_dropout_inpt = T.fill(p_dropout_inpt, self.p_dropout_inpt)

        p_dropout_shortcut = T.zeros_like(inpt[:, :, self.n_latent:])
        p_dropout_shortcut = T.fill(p_dropout_shortcut, self.p_dropout_inpt)

        p_dropout_inpt = T.concatenate([p_dropout_inpt, p_dropout_shortcut],
                                       axis=2)

        p_dropouts = [p_dropout_inpt] + self.p_dropout_hiddens
        if self.p_dropout_hidden_to_out is None:
            p_dropouts.append(self.p_dropout_hiddens[-1])
        else:
            p_dropouts.append(self.p_dropout_hidden_to_out)

        exprs = vprnn.exprs(inpt,
                            T.zeros_like(inpt),
                            P.in_to_hidden,
                            hidden_to_hiddens,
                            P.hidden_to_out,
                            hidden_biases, [1 for _ in hidden_biases],
                            initial_hidden_means,
                            initial_hidden_vars,
                            recurrents,
                            P.out_bias,
                            1,
                            self.gen_transfers,
                            self.assumptions.statify_visible,
                            p_dropouts=p_dropouts)

        return exprs
Exemple #4
0
    def dlogp(inputs, gradients):
        g_logp, = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = tt.nlinalg.diag(chol_cov)
        ok = tt.all(diag > 0)

        chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = tt.switch(ok, g_cov, -np.nan)
        g_delta = tt.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
Exemple #5
0
        def _step2(ctx_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, encoderInputs.shape[1], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (encoderInputs.shape[1], self.de_hidden_size))
            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0, self.linear)
            state_below = tensor.nnet.softmax(newpred)

            return state_below, hs, Cs
Exemple #6
0
def likelihoodFunc(v,size,graphMatrix,value):
    v=tt.fill(tt.ones(size),v)#create a vector of the null bernouli probs 
    penalty=tt.dot(graphMatrix,value) #matrix multipl. to get how many correlated features are "on"
    penalty=5-4*tt.exp(-penalty)
    v=v*penalty
    ll=tt.sum(tt.log(tt.pow(v, value)))
    return ll
Exemple #7
0
        def _step2(ctx_, state_, hs_, Cs_):

            #print ctx_.shape, state_.shape, hs_.shape, Cs_.shape

            hs, Cs = [], []
            token_idxs = T.cast(state_.argmax(axis=-1), "int32")
            msk_ = T.fill((T.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = T.concatenate([ctx_, state_below0], axis=1)

            newpred = T.dot(state_below0,
                            self.linear) + self.linear_bias[None, :]
            state_below = T.nnet.softmax(newpred)

            extra_p = T.zeros_like(hs[:, :, 0])
            state_below = T.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs
        def _step2(ctx_, state_, hs_, Cs_):
            ### ctx_: b x h
            ### state_ : b x h
            ### hs_ : 1 x b x h    the first dimension is the number of the decoder layers
            ### Cs_ : 1 x b x h    the first dimension is the number of the decoder layers

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0,
                                 self.linear) + self.linear_bias[None, :]
            state_below = tensor.nnet.softmax(newpred)

            ##### the beging symbole probablity is 0
            extra_p = tensor.zeros_like(hs[:, :, 0])
            state_below = tensor.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs
    def output_probabilistic(self, m_x, v_x):

        m_linear = T.dot(m_x, self.m_W[ 0, :, : ]) + T.tile(self.m_b[ 0, :, : ], [ m_x.shape[ 0 ], 1 ])
        v_linear = T.dot(m_x**2, self.v_W[ 0, :, : ]) + T.dot(v_x, self.m_W[ 0, :, : ]**2) + T.dot(v_x, self.v_W[ 0, :, : ]) + \
            T.tile(self.v_b[ 0, :, : ], [ m_x.shape[ 0 ], 1 ])

        if not self.output_layer:

            # We compute the mean and variance after the ReLU activation

            alpha = m_linear / T.sqrt(v_linear)
            gamma = Network_layer.gamma(-alpha)
            gamma_robust = -alpha - 1.0 / alpha + 2.0 / alpha**3
            gamma_final = T.switch(T.lt(-alpha, T.fill(alpha, 30)), gamma, gamma_robust)

            v_aux = m_linear + T.sqrt(v_linear) * gamma_final

            m_a = Network_layer.n_cdf(alpha) * v_aux
            v_a = m_a * v_aux * Network_layer.n_cdf(-alpha) + Network_layer.n_cdf(alpha) * v_linear * (1 - gamma_final * (gamma_final + alpha))

            return (m_a, v_a)

        else:

            return (m_linear, v_linear)
Exemple #10
0
    def dlogp(inputs, gradients):
        g_logp, = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = tt.nlinalg.diag(chol_cov)
        ok = tt.all(diag > 0)

        chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = tt.switch(ok, g_cov, -np.nan)
        g_delta = tt.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
 def _FindB_best(lPLcl, lPprev, dVLcl):
     srtLcl = tensor.argsort(-lPLcl)
     srtLcl = srtLcl[:beam_size]
     deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.))
     deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
     lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec)
     xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) 
     return lProbBest, xWIdxBest 
Exemple #12
0
def transform_targets(targets):
    """Transform targets into a format suitable for passing to cost()."""

    reshaped = T.shape_padleft(targets)
    blanks = T.fill(reshaped, _BLANK)
    result = T.concatenate([blanks, reshaped]).dimshuffle(1, 0, 2).reshape((2*targets.shape[0], targets.shape[1]))
    result = T.concatenate([result, T.shape_padleft(result[0])])
    return result
Exemple #13
0
 def energyFnMB(self, VM, HM):
     """
     evaluates the energy functions of the RBM given row vector(s) of v and h
     
     
     args:
     VM (T.matrix): rows of visible layer values
     HM (T.matrix): rows of hidden layer values        
     
     return:
     a row Theano vector, elements being E(v_row, h_row)
     """
     T_bh = T.dot(HM, self.T_b)
     T_omghv = T.transpose(T.sum(T.mul(T.dot(T.mul(T.fill(VM, T.exp(-self.T_z)), VM), self.T_omega), HM), axis=1,acc_dtype=theano.config.floatX))
     T_Vsqr = T.mul(VM-T.fill(VM, self.T_a),VM-T.fill(VM, self.T_a))
     T_VsqrOmg = T.transpose(T.sum(T.mul(T.fill(T_Vsqr,np.float32(0.5)*T.exp(-self.T_z)),T_Vsqr),axis=1, acc_dtype=theano.config.floatX))
     return -T_VsqrOmg + T_omghv + T_bh
Exemple #14
0
def my_crf_accuracy(energies):
    assert energies.ndim == 4

    def inner_function(energies_one_step, prior_pi, prior_pointer):
        """

        :param energies_one_step: [batch_size, t, t]
        :param prior_pi: [batch_size, t]
        :param prior_pointer: [batch_size, t]
        :return:
        """
        prior_pi_shuffled = prior_pi.dimshuffle(0, 1, 'x')
        pi_t = T.max(prior_pi_shuffled + energies_one_step, axis=1)
        pointer_t = T.argmax(prior_pi_shuffled + energies_one_step, axis=1)

        return [pi_t, pointer_t]

    def back_pointer(pointer, pointer_tp1):
        """

        :param pointer: [batch, t]
        :param point_tp1: [batch,]
        :return:
        """
        return pointer[T.arange(pointer.shape[0]), pointer_tp1]

    # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels)
    # but scan requires the iterable dimension to be first
    # So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels)
    energies_shuffled = energies.dimshuffle(1, 0, 2, 3)
    # pi at time 0 is the last rwo at time 0. but we need to remove the last column which is the pad symbol.
    pi_time0 = energies_shuffled[0, :, -1, :-1]

    # the last row and column is the tag for pad symbol. reduce these two dimensions by 1 to remove that.
    # now the shape of energies_shuffled is [n_time_steps, b_batch, t, t] where t = num_labels - 1.
    energies_shuffled = energies_shuffled[:, :, :-1, :-1]

    initials = [pi_time0, T.cast(T.fill(pi_time0, -1), 'int64')]

    [pis, pointers], _ = theano.scan(fn=inner_function,
                                     outputs_info=initials,
                                     sequences=[energies_shuffled[1:]])
    pi_n = pis[-1]
    pointer_n = T.argmax(pi_n, axis=1)

    back_pointers, _ = theano.scan(fn=back_pointer,
                                   outputs_info=pointer_n,
                                   sequences=[pointers],
                                   go_backwards=True)

    # prediction shape [batch_size, length]
    prediction_revered = T.concatenate(
        [pointer_n.dimshuffle(0, 'x'),
         back_pointers.dimshuffle(1, 0)], axis=1)
    prediction = prediction_revered[:,
                                    T.arange(prediction_revered.shape[1] -
                                             1, -1, -1)]
    return prediction
Exemple #15
0
    def __init__(self, mean, var, rng=None):
        self.mean = mean

        # This allows to use var with shape (1, 1, n)
        self.var = T.fill(mean, var)

        self.stt = T.concatenate((mean, self.var), -1)
        self.maximum = self.mean
        super(DiagGauss, self).__init__(rng)
Exemple #16
0
def chain_crf_loss(energies, targets, masks):
    """
    compute minus log likelihood of chain crf as chain crf loss.
    :param energies: Theano 4D tensor
        energies of each step. the shape is [batch_size, n_time_steps, num_labels, num_labels],
        where the pad label index is at last.
    :param targets: Theano 2D tensor
        targets in the shape [batch_size, n_time_steps]
    :param masks: Theano 2D tensor
        masks in the shape [batch_size, n_time_steps]
    :return: Theano 1D tensor
        an expression for minus log likelihood loss.
    """

    assert energies.ndim == 4
    assert targets.ndim == 2
    assert masks.ndim == 2

    def inner_function(energies_one_step, targets_one_step, mask_one_step, prior_partition, prev_label, tg_energy):
        """

        :param energies_one_step: [batch_size, t, t]
        :param targets_one_step: [batch_size]
        :param prior_partition: [batch_size, t]
        :param prev_label: [batch_size]
        :param tg_energy: [batch_size]
        :return:
        """

        partition_shuffled = prior_partition.dimshuffle(0, 1, 'x')
        partition_t = T.switch(mask_one_step.dimshuffle(0, 'x'),
                               theano_logsumexp(energies_one_step + partition_shuffled, axis=1),
                               prior_partition)

        return [partition_t, targets_one_step,
                tg_energy + energies_one_step[T.arange(energies_one_step.shape[0]), prev_label, targets_one_step]]

    # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels)
    # but scan requires the iterable dimension to be first
    # So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels)
    energies_shuffled = energies.dimshuffle(1, 0, 2, 3)
    targets_shuffled = targets.dimshuffle(1, 0)
    masks_shuffled = masks.dimshuffle(1, 0)

    # initials should be energies_shuffles[0, :, -1, :]
    init_label = T.cast(T.fill(energies[:, 0, 0, 0], -1), 'int32')
    energy_time0 = energies_shuffled[0]
    target_time0 = targets_shuffled[0]
    initials = [energies_shuffled[0, :, -1, :], target_time0,
                energy_time0[T.arange(energy_time0.shape[0]), init_label, target_time0]]
    [partitions, _, target_energies], _ = theano.scan(fn=inner_function, outputs_info=initials,
                                                      sequences=[energies_shuffled[1:], targets_shuffled[1:],
                                                                 masks_shuffled[1:]])
    partition = partitions[-1]
    target_energy = target_energies[-1]
    loss = theano_logsumexp(partition, axis=1) - target_energy
    return loss
Exemple #17
0
    def __init__(self, mean, var, rng=None):
        self.mean = mean

        # This allows to use var with shape (1, 1, n)
        self.var = T.fill(mean, var)

        self.stt = T.concatenate((mean, self.var), -1)
        self.maximum = self.mean
        super(DiagGauss, self).__init__(rng)
Exemple #18
0
def transform_targets(targets):
    """Transform targets into a format suitable for passing to cost()."""

    reshaped = T.shape_padleft(targets)
    blanks = T.fill(reshaped, _BLANK)
    result = T.concatenate([blanks, reshaped]).dimshuffle(1, 0, 2).reshape(
        (2 * targets.shape[0], targets.shape[1]))
    result = T.concatenate([result, T.shape_padleft(result[0])])
    return result
 def _FindB_best(lPLcl, lPprev, dVLcl):
     srtLcl = tensor.argsort(-lPLcl)
     srtLcl = srtLcl[:beam_size]
     deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.))
     deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
     lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                        lPLcl[srtLcl] + lPprev, deltaVec)
     xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)),
                        srtLcl, tensor.zeros_like(srtLcl))
     return lProbBest, xWIdxBest
Exemple #20
0
def MRR_loss(y_true, y_pred):
    '''
    Training data have to be Xloop, Xtap, target = utils.MakeTrainingDataRank(Loop, Tap)
    Batch size have to be 40
    '''
    comp = T.zeros_like(y_true)
    comp = T.fill(comp, T.mean(y_pred[T.argmax(y_true)]))
    Rank = T.sum(T.gt(comp, y_pred))
    #T.dot(y_pred[39].T, T.ones_like(y_true).T)
    return Rank + T.mean(y_true) * 0 + T.mean(y_pred) * 0
    def output_deterministic(self, output_previous):

        # We add an additional input with value 1

        output_previous_with_bias = \
            T.concatenate([ output_previous, T.alloc(1, 1) ], 0) / \
            T.sqrt(self.n_inputs)

        # We compute the mean and variance after the linear operation

        a = T.dot(self.w, output_previous_with_bias)

        if (self.non_linear):

            # We compute the ReLU activation

            a = T.switch(T.lt(a, T.fill(a, 0)), T.fill(a, 0), a)

        return a
Exemple #22
0
 def vtohMB(self, VsampM):
     """
     computes hidden unit outputs given visible unit outputs ("half" a MCMC iteration)
     computes in parallel given input rows of visible units
    
     args:
     VsampM (T.matrix): rows of visible unit outputs
     
     returns:
     a T.matrix, rows of hidden unit outputs
     
     """
     Vomg = T.matrix(name="Vomg", dtype=theano.config.floatX)
     vtohMBres = T.matrix(name ="vtohMBres", dtype=theano.config.floatX)
     T_HP = T.matrix(name="T_HP", dtype=theano.config.floatX)
     
     Vomg = T.dot(T.mul(T.fill(VsampM, T.exp(-self.T_z)), VsampM), self.T_omega)
     T_Hp = T.nnet.ultra_fast_sigmoid(T.fill(Vomg, self.T_b) + Vomg)
     vtohMBres = self.T_rng.binomial(size = T_Hp.shape, p=T_Hp, dtype=theano.config.floatX)
     return vtohMBres
Exemple #23
0
    def _gen_exprs(self, inpt):
        """Return the exprssions of the recognition model."""
        P = self.parameters.gen

        n_layers = len(self.n_hiddens_gen)
        hidden_to_hiddens = [getattr(P, 'hidden_to_hidden_%i' % i)
                             for i in range(n_layers - 1)]
        hidden_biases = [getattr(P, 'hidden_bias_%i' % i)
                         for i in range(n_layers)]
        initial_hidden_means = [getattr(P, 'initial_hidden_means_%i' % i)
                                for i in range(n_layers)]
        initial_hidden_vars = [getattr(P, 'initial_hidden_vars_%i' % i)
                               for i in range(n_layers)]
        recurrents = [getattr(P, 'recurrent_%i' % i)
                      for i in range(n_layers)]

        shortcut_size = self.n_hiddens_recog[-1]
        p_dropout_inpt = T.zeros_like(inpt[:, :, :self.n_latent])
        p_dropout_inpt = T.fill(p_dropout_inpt, self.p_dropout_inpt)

        p_dropout_shortcut = T.zeros_like(inpt[:, :, self.n_latent:])
        p_dropout_shortcut = T.fill(p_dropout_shortcut, self.p_dropout_inpt)

        p_dropout_inpt = T.concatenate([p_dropout_inpt, p_dropout_shortcut],
                                       axis=2)

        p_dropouts = [p_dropout_inpt] + self.p_dropout_hiddens
        if self.p_dropout_hidden_to_out is None:
            p_dropouts.append(self.p_dropout_hiddens[-1])
        else:
            p_dropouts.append(self.p_dropout_hidden_to_out)

        exprs = vprnn.exprs(
            inpt, T.zeros_like(inpt), P.in_to_hidden, hidden_to_hiddens, P.hidden_to_out,
            hidden_biases, [1 for _ in hidden_biases],
            initial_hidden_means, initial_hidden_vars,
            recurrents,
            P.out_bias, 1, self.gen_transfers, self.assumptions.statify_visible,
            p_dropouts=p_dropouts)

        return exprs
Exemple #24
0
 def htovMB(self, HsampM):
     """
     computes visible unit outputs given hidden unit inputs ("half" a MCMC iteration)
     computes in parallel given input rows of hidden units
    
     args:
     HsampM (T.matrix): rows of hidden unit inputs
     
     returns:
     a T.matrix, rows of visible unit outputs
     
     """
     
     T_omgH = T.matrix(name="T_omgH", dtype=theano.config.floatX)
     T_means = T.matrix(name="T_means", dtype=theano.config.floatX)
     htovMBres = T.matrix(name="htovMBres", dtype=theano.config.floatX)
     
     T_omgH = T.transpose(T.dot(self.T_omega, T.transpose(HsampM)))
     T_means = T.fill(T_omgH, self.T_a) + T_omgH
     htovMBres = self.T_rng.normal(size=T_means.shape, avg=T_means, std=T.fill(T_means,T.sqrt(T.exp(self.T_z))), dtype=theano.config.floatX)
     return htovMBres
Exemple #25
0
 def _log_likelihood(self, x_vars, means):
     """
     This function computes the symbolic log-likelihood for a diagonal gaussian defined by the given
     means and a fixed sigma.
     :param x_vars:
     :param means:
     :return:
     """
     std = T.fill(T.zeros_like(means), self.policy.sigma)
     zs = (x_vars - means) / std
     return -T.sum(T.log(std), axis=-1)\
            -0.5 * T.sum(T.square(zs), axis=-1)\
            -0.5 * means.shape[-1] * np.log(2 * np.pi)
    def CalculateCosineS(self, options, ctx=None, proj_h=None, h_mask=None):
        r = options['r']
        fill_matrix = tensor.ones_like(h_mask) - h_mask
        norm_ctx = ctx.norm(2, 2)
        norm_proj = (proj_h + fill_matrix[:, :, None]).norm(2, 2) * h_mask
        mul_cp = (ctx * proj_h).sum(2)
        cos_cp = mul_cp / (norm_ctx * norm_proj + fill_matrix)
        r_ = tensor.zeros_like(cos_cp)
        r_ = tensor.fill(r_, r)
        exp_cp = tensor.exp(cos_cp * r_) * h_mask

        p = exp_cp / (exp_cp.sum(0)[None, :] +
                      tensor.min(fill_matrix, axis=0)[None, :])
        return p
Exemple #27
0
 def _r_loss(self, preds, y):
     """
     :param preds: (n_batch, T) this variable stores the predictions for one path of size T
     :param y:   (n_batch, ) this variable stores the targets
     :return:
     """
     # y_rep: (n_batch, T, )
     y_rep = T.stack([
         T.fill(T.zeros((self.policy.n_steps)), y[b])
         for b in xrange(self.policy.n_batch)
     ],
                     axis=0)
     #return T.nnet.binary_crossentropy(probs[:, :, 1], y_rep).mean(axis=[0,1])
     return T.neq(preds, y_rep).mean(axis=[0, 1])
 def CalculateCosine(self, options, ctx=None, proj_h=None, ctx_mask=None):
     r = options['r']
     fill_matrix = tensor.ones_like(ctx_mask) - ctx_mask
     norm_ctx = (ctx + fill_matrix[:, :, None]).norm(2, 2) * ctx_mask
     norm_proj = proj_h.norm(2, 1)
     mul_cp = (ctx * proj_h[None, :, :]).sum(2)
     cos_cp = mul_cp / (norm_ctx * norm_proj[None, :] + fill_matrix)
     r_ = tensor.zeros_like(cos_cp)
     r_ = tensor.fill(r_, r)
     exp_cp = tensor.exp(cos_cp * r_) * ctx_mask
     p = exp_cp / (exp_cp.sum(0)[None, :] +
                   tensor.min(fill_matrix, axis=0)[None, :])
     prob_max = p.argmax(0)
     return p, prob_max
Exemple #29
0
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = self.get_mode()
        x = T.fmatrix()

        # tests exp_over_1_plus_exp
        f = theano.function([x], 1 - T.exp(x) / (1 + T.exp(x)), mode=m)
        assert [node.op for node in f.maker.fgraph.toposort()] == [tensor.neg, sigmoid_inplace]

        # tests inv_1_plus_exp
        f = theano.function([x], 1 - T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
        assert [node.op for node in f.maker.fgraph.toposort()] == [tensor.neg, sigmoid_inplace]
Exemple #30
0
    def get_pi_from_v(self, Q):
        if self.v_to_pi == 'greedy' or self.v_to_pi == 'e-greedy':
            greedy_actions = T.argmax(Q, axis=-1)
            greedy_pi = T.extra_ops.to_one_hot(greedy_actions,
                                               nb_class=5,
                                               dtype='int32')

            if self.v_to_pi == 'greedy':
                return greedy_pi
            else:
                return T.fill(
                    Q, self.epsilon / 5) + (1 - self.epsilon) * greedy_pi
        elif self.v_to_pi == 'softmax':
            return T.nnet.softmax(Q)
        else:
            raise Exception()
Exemple #31
0
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = self.get_mode()
        x = T.fmatrix()

        # tests exp_over_1_plus_exp
        f = theano.function([x], 1 - T.exp(x) / (1 + T.exp(x)), mode=m)
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            tensor.neg, sigmoid_inplace]

        # tests inv_1_plus_exp
        f = theano.function([x], 1 - T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
        assert [node.op for node in f.maker.fgraph.toposort()] == [tensor.neg,
                sigmoid_inplace]
 def CalculateCosine_webS(self,
                          options,
                          ctx=None,
                          proj_h=None,
                          mask_x=None):
     r = options['r']
     norm_ctx = ctx.norm(2, 2)
     norm_proj = proj_h.norm(2, 2)
     mul_cp = (ctx * proj_h).sum(2)
     cos_cp = mul_cp / (norm_ctx * norm_proj + 0.0001)
     r_ = tensor.zeros_like(cos_cp)
     r_ = tensor.fill(r_, r)
     exp_cp = tensor.exp(cos_cp * r_)
     exp_cp_ = exp_cp * (mask_x.reshape(
         [mask_x.shape[0], ctx.shape[0], ctx.shape[1]]).max(0))
     p = exp_cp_ / (exp_cp_.sum(0)[None, :] + 0.0001)
     return p
        def _step2(diag_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, encoderInputs.shape[1], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (encoderInputs.shape[1], self.de_hidden_size))

            attn_index = tensor.nonzero(diag_, True)
            attn_value = tensor.nonzero_values(diag_)

            en_context = Encoder_shuffle[:, attn_index[0], :]
            attn_context = Encoder_shuffle_re[:, attn_index[0], :]

            attn_weight = tensor.batched_dot(attn_context, state_below0)
            attn_weight = tensor.nnet.softmax(attn_weight)
            #attn_weight *= (encoderMask.dimshuffle(1,0))

            attn_weight *= (attn_value.dimshuffle('x', 0))
            ##attn_weight = attn_weight/(tensor.sum(attn_weight, axis=1).dimshuffle(0,'x'))
            ####### ctx_ : (b, h)
            ctx_ = tensor.sum(en_context * attn_weight[:, :, None], axis=1)

            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0,
                                 self.linear) + self.linear_bias[None, :]
            state_below = tensor.nnet.softmax(newpred)
            ##### the beging symbole probablity is 0
            extra_p = tensor.zeros_like(hs[:, :, 0])
            state_below = tensor.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs
Exemple #34
0
def crf_loss(energies, targets, masks):
    assert energies.ndim == 4
    assert targets.ndim == 2
    assert masks.ndim == 2

    def inner_function(energies_one_step, targets_one_step, mask_one_step,
                       prior_partition, prev_label, tg_energy):
        partition_shuffled = prior_partition.dimshuffle(0, 1, 'x')
        partition_t = T.switch(
            mask_one_step.dimshuffle(0, 'x'),
            theano_logsumexp(energies_one_step + partition_shuffled, axis=1),
            prior_partition)

        return [
            partition_t, targets_one_step,
            tg_energy + energies_one_step[T.arange(energies_one_step.shape[0]),
                                          prev_label, targets_one_step]
        ]

    # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels)
    # but scan requires the iterable dimension to be first
    # So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels)
    energies_shuffled = energies.dimshuffle(1, 0, 2, 3)
    targets_shuffled = targets.dimshuffle(1, 0)
    masks_shuffled = masks.dimshuffle(1, 0)
    # initials should be energies_shuffles[0, :, -1, :]
    init_label = T.cast(T.fill(energies[:, 0, 0, 0], -1), 'int32')
    energy_time0 = energies_shuffled[0]
    target_time0 = targets_shuffled[0]
    initials = [
        energies_shuffled[0, :, -1, :], target_time0,
        energy_time0[T.arange(energy_time0.shape[0]), init_label, target_time0]
    ]
    [partitions, _,
     target_energies], _ = theano.scan(fn=inner_function,
                                       outputs_info=initials,
                                       sequences=[
                                           energies_shuffled[1:],
                                           targets_shuffled[1:],
                                           masks_shuffled[1:]
                                       ])
    partition = partitions[-1]
    target_energy = target_energies[-1]
    loss = theano_logsumexp(partition, axis=1) - target_energy
    return loss
    def __init__(self, input, input_sm, vocab_size, emb_dim, local_context_size, global_context_size):
        
        # initialize W_emb
        global rng
        global init_range
        if pretrain_file:
            linear_W_emb = load_pretrain_emb(pretrain_file)
            print "* Using pretrained linear_W_emb ..."
            assert(len(linear_W_emb) == vocab_size)
        else:
            linear_W_emb = np.asarray(rng.uniform(
                low=-init_range, high=init_range, size=(vocab_size, emb_dim)), dtype=theano.config.floatX)

        # shared variables
        self.W_emb = theano.shared(value=linear_W_emb, name='W_emb')
        # stack vectors
        input = T.cast(input, 'int32')

        # output is a matrix where each row correponds to a context_size embedding vector, and row number equals to batch size
        # output dimensions: batch_size * ((context_size + 1) * emb_dim)
        output_local = self.W_emb[input[:, :local_context_size].flatten()].reshape(
            (input.shape[0], local_context_size * emb_dim))  # self.W_emb.shape[1]

        # define symbolic functions for calculating the mean of sentences
        W = T.matrix('W')
        eos_vector = T.vector('eos_vector')
        eos_vector = T.fill(T.zeros_like(input[0,local_context_size:]), io_vocab.VocabConstants.EOS_INDEX)
        
        def weighted_sentence(sentence, W, eos_vector):
            sent_len = T.sum(T.neq(sentence, eos_vector))
            return T.mean(W[sentence[:sent_len]], axis=0)

        output_global, updates = theano.scan(fn=weighted_sentence,
                                  outputs_info=None,
                                  sequences=input[:, local_context_size:],
                                  non_sequences=[self.W_emb, eos_vector])

        # concatenate local output and global output to form the output matrix
        self.output = T.concatenate([output_local, output_global], axis=1)

        # params is the word embedding matrix
        self.params = [self.W_emb]
Exemple #36
0
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = theano.config.mode
        if m == 'FAST_COMPILE':
            m = 'FAST_RUN'

        x = T.fmatrix()

        # tests exp_over_1_plus_exp
        f = theano.function([x], 1 - T.exp(x)/(1+T.exp(x)), mode=m)
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [tensor.neg, sigmoid_inplace]

        # tests inv_1_plus_exp
        f = theano.function([x], 1 - T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [tensor.neg,
                sigmoid_inplace]
Exemple #37
0
def crf_loss(energies, targets, masks):
    assert energies.ndim == 4
    assert targets.ndim == 2
    assert masks.ndim == 2

    def inner_function(energies_one_step, targets_one_step, mask_one_step,
                       prior_partition, prev_label, tg_energy):
        partition_shuffled = prior_partition.dimshuffle(0, 1, 'x')
        partition_t = T.switch(
            mask_one_step.dimshuffle(0, 'x'),
            theano_logsumexp(energies_one_step + partition_shuffled, axis=1),
            prior_partition)

        return [
            partition_t, targets_one_step,
            tg_energy + energies_one_step[T.arange(energies_one_step.shape[0]),
                                          prev_label, targets_one_step]
        ]

    energies_shuffled = energies.dimshuffle(1, 0, 2, 3)
    targets_shuffled = targets.dimshuffle(1, 0)
    masks_shuffled = masks.dimshuffle(1, 0)
    init_label = T.cast(T.fill(energies[:, 0, 0, 0], -1), 'int32')
    energy_time0 = energies_shuffled[0]
    target_time0 = targets_shuffled[0]
    initials = [
        energies_shuffled[0, :, -1, :], target_time0,
        energy_time0[T.arange(energy_time0.shape[0]), init_label, target_time0]
    ]
    [partitions, _,
     target_energies], _ = theano.scan(fn=inner_function,
                                       outputs_info=initials,
                                       sequences=[
                                           energies_shuffled[1:],
                                           targets_shuffled[1:],
                                           masks_shuffled[1:]
                                       ])
    partition = partitions[-1]
    target_energy = target_energies[-1]
    loss = theano_logsumexp(partition, axis=1) - target_energy
    return loss
Exemple #38
0
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = self.get_mode()
        x = tt.fmatrix()

        # tests exp_over_1_plus_exp
        f = theano.function([x], 1 - tt.exp(x) / (1 + tt.exp(x)), mode=m)
        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            tt.neg,
            sigmoid_inplace,
        ]

        # tests inv_1_plus_exp
        f = theano.function([x], 1 - tt.fill(x, 1.0) / (1 + tt.exp(-x)), mode=m)
        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            tt.neg,
            sigmoid_inplace,
        ]
    def output_probabilistic(self, m_w_previous, v_w_previous):

        # We add an additional deterministic input with mean 1 and variance 0

        m_w_previous_with_bias = \
            T.concatenate([ m_w_previous, T.alloc(1, 1) ], 0)
        v_w_previous_with_bias = \
            T.concatenate([ v_w_previous, T.alloc(0, 1) ], 0)

        # We compute the mean and variance after the linear operation

        m_linear = T.dot(self.m_w, m_w_previous_with_bias) / T.sqrt(self.n_inputs)
        v_linear = (T.dot(self.v_w, v_w_previous_with_bias) + \
            T.dot(self.m_w**2, v_w_previous_with_bias) + \
            T.dot(self.v_w, m_w_previous_with_bias**2)) / self.n_inputs

        if (self.non_linear):

            # We compute the mean and variance after the ReLU activation

            alpha = m_linear / T.sqrt(v_linear)
            gamma = Network_layer.gamma(-alpha)
            gamma_robust = -alpha - 1.0 / alpha + 2.0 / alpha**3
            gamma_final = T.switch(T.lt(-alpha, T.fill(alpha, 30)), gamma, gamma_robust)

            v_aux = m_linear + T.sqrt(v_linear) * gamma_final

            m_a = Network_layer.n_cdf(alpha) * v_aux
            v_a = m_a * v_aux * Network_layer.n_cdf(-alpha) + \
                Network_layer.n_cdf(alpha) * v_linear * \
                (1 - gamma_final * (gamma_final + alpha))

            return (m_a, v_a)

        else:

            return (m_linear, v_linear)
Exemple #40
0
    def _collect_samples(self, y):
        """
        This function collect N samples of size T using the current policy.
        :param y:
        :return: locations (n_batch, N, T, 2), probabilities (n_batch, N, T, n_classes),
        rewards (n_batch, N, T, ) and returns (n_batch, N, T, )
        """
        means = []
        locs = []
        probs = []
        returns = []
        preds = []

        # Reshape target labels to match the classification outputs along each path of length T
        y_rep = T.stack([
            T.fill(T.zeros((self.policy.n_steps)), y[b])
            for b in xrange(self.policy.n_batch)
        ],
                        axis=0)
        for _ in xrange(self.policy.N):
            loc_means_t, locs_t, _, x_ts, p_ts = self.policy.step_forward()
            locs.append(locs_t)
            means.append(loc_means_t)
            probs.append(p_ts)
            pred = np.argmax(p_ts, axis=2)
            preds.append(pred)
            rewards = self._acc_score(pred, y_rep)
            returns.append(cumsum(rewards, axis=1))

        locs = T.stack(locs).dimshuffle(1, 0, *range(2, T.stack(locs).ndim))
        means = T.stack(means).dimshuffle(1, 0, *range(2, T.stack(means).ndim))
        preds = T.stack(preds).dimshuffle(1, 0, *range(2, T.stack(preds).ndim))
        returns = T.stack(returns).dimshuffle(1, 0,
                                              *range(2,
                                                     T.stack(returns).ndim))

        return locs, means, preds, returns
Exemple #41
0
def myMask(input_t, mask, binarize=False, clip=True):
    """ Same of myMaskArr but with theano thensors.
    It  performs the following :
        1) masks *input_t* with *mask*, takes only value bigger than `settings.THRESHOLD`
        # no more 2) divides each entry to the maximum value in the resulting tensor
        3) if *binarize* is True, sets 1.0  - EPS(0) in the maximum value of each column
            and EPS(0) in any other position of the column
    This uses EPS(0) to avoid nans in cross-entropy loss function.
    If *clip* is True, then the returned tensor will be clipped between EPS(0) and
    1.0 - EPS(0)

    RETURNS :
        theano tensor
    """
    assert (input_t.ndim == 4 and mask.ndim == 4 and binarize) or (not binarize),\
        "input and mask MUST be 4D tensors to the end of binarization"

    masked = mask * input_t
    # masked = masked * (masked > settings.THRESHOLD)
    # masked = abs(input_t * mask)
    # # normalized = masked
    # normalized = masked / masked.max()

    if binarize:
        binarized = T.fill(masked, EPS(0))
        max_rows = masked.argmax(axis=2)
        max_cols = T.arange(masked.shape[3])
        normalized = T.set_subtensor(binarized[0, 0, max_rows, max_cols],
                                     1.0 - EPS(0))
        returned = normalized

    elif clip:
        returned = masked.clip(EPS(0), 1 - EPS(0))
    else:
        returned = masked
    return returned
Exemple #42
0
    def test_exp_over_1_plus_exp(self):
        m = self.get_mode(excluding=["local_elemwise_fusion"])

        x = tt.vector()
        data = np.random.rand(54).astype(config.floatX)

        backup = config.warn__identify_1pexp_bug
        config.warn__identify_1pexp_bug = False
        try:
            # tests exp_over_1_plus_exp
            f = theano.function([x], tt.exp(x) / (1 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = theano.function([x], tt.exp(x) / (2 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], tt.exp(x) / (1 - tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], tt.exp(x + 1) / (1 + tt.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp
            f = theano.function([x], tt.fill(x, 1.0) / (1 + tt.exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(f, ops_to_check=sigmoid)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = theano.function([x], tt.fill(x, 1.0) / (2 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], tt.fill(x, 1.0) / (1 - tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], tt.fill(x, 1.1) / (1 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp with neg
            f = theano.function([x], tt.fill(x, -1.0) / (1 + tt.exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(
            #     f, ops_to_check=[sigmoid, neg_inplace])
            assert [node.op for node in f.maker.fgraph.toposort()] == [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = theano.function([x], tt.fill(x, -1.0) / (1 - tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = theano.function([x], tt.fill(x, -1.0) / (2 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = theano.function([x], tt.fill(x, -1.1) / (1 + tt.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)

            # tests double inv_1_plus_exp with neg
            # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
            # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
            # = - (sigm(x) * sigm(x))
            f = theano.function(
                [x],
                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            # todo: solve issue #4589 first
            # assert check_stack_trace(f, ops_to_check=[sigmoid, tt.mul])
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid, tt.mul]
            f(data)
            f = theano.function(
                [x],
                (tt.fill(x, -1.1) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                tt.mul,
                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
                (tt.fill(x, -1.0) * tt.exp(x)) / ((2 + tt.exp(x)) * (1 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                tt.mul,
                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (2 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                tt.mul,
                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (1 + tt.exp(x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                tt.mul,
                neg_inplace,
            ]
            f(data)
            f = theano.function(
                [x],
                (tt.fill(x, -1.0) * tt.exp(x)) / ((1 + tt.exp(x)) * (2 + tt.exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                tt.mul,
                neg_inplace,
            ]
            f(data)

        finally:
            # Restore config option.
            config.warn__identify_1pexp_bug = backup
Exemple #43
0
def MvNormalLogp():
    """Compute the log pdf of a multivariate normal distribution.

    This should be used in MvNormal.logp once Theano#5908 is released.

    Parameters
    ----------
    cov : tt.matrix
        The covariance matrix.
    delta : tt.matrix
        Array of deviations from the mean.
    """
    cov = tt.matrix('cov')
    cov.tag.test_value = floatX(np.eye(3))
    delta = tt.matrix('delta')
    delta.tag.test_value = floatX(np.zeros((2, 3)))

    solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
    solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
    cholesky = Cholesky(lower=True, on_error='nan')

    n, k = delta.shape
    n, k = f(n), f(k)
    chol_cov = cholesky(cov)
    diag = tt.nlinalg.diag(chol_cov)
    ok = tt.all(diag > 0)

    chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
    delta_trans = solve_lower(chol_cov, delta.T).T

    result = n * k * tt.log(f(2) * np.pi)
    result += f(2) * n * tt.sum(tt.log(diag))
    result += (delta_trans ** f(2)).sum()
    result = f(-.5) * result
    logp = tt.switch(ok, result, -np.inf)

    def dlogp(inputs, gradients):
        g_logp, = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = tt.nlinalg.diag(chol_cov)
        ok = tt.all(diag > 0)

        chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = tt.switch(ok, g_cov, -np.nan)
        g_delta = tt.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]

    return theano.OpFromGraph(
        [cov, delta], [logp], grad_overrides=dlogp, inline=True)
Exemple #44
0
def chain_crf_accuracy(energies, targets):
    """
    decode crf and compute accuracy
    :param energies: Theano 4D tensor
        energies of each step. the shape is [batch_size, n_time_steps, num_labels, num_labels],
        where the pad label index is at last.
    :param targets: Theano 2D tensor
        targets in the shape [batch_size, n_time_steps]
    :return: Theano 1D tensor
        an expression for minus log likelihood loss.
    """

    assert energies.ndim == 4
    assert targets.ndim == 2

    def inner_function(energies_one_step, prior_pi, prior_pointer):
        """

        :param energies_one_step: [batch_size, t, t]
        :param prior_pi: [batch_size, t]
        :param prior_pointer: [batch_size, t]
        :return:
        """
        prior_pi_shuffled = prior_pi.dimshuffle(0, 1, 'x')
        pi_t = T.max(prior_pi_shuffled + energies_one_step, axis=1)
        pointer_t = T.argmax(prior_pi_shuffled + energies_one_step, axis=1)

        return [pi_t, pointer_t]

    def back_pointer(pointer, pointer_tp1):
        """

        :param pointer: [batch, t]
        :param point_tp1: [batch,]
        :return:
        """
        return pointer[T.arange(pointer.shape[0]), pointer_tp1]

    # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels)
    # but scan requires the iterable dimension to be first
    # So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels)
    energies_shuffled = energies.dimshuffle(1, 0, 2, 3)
    # pi at time 0 is the last rwo at time 0. but we need to remove the last column which is the pad symbol.
    pi_time0 = energies_shuffled[0, :, -1, :-1]

    # the last row and column is the tag for pad symbol. reduce these two dimensions by 1 to remove that.
    # now the shape of energies_shuffled is [n_time_steps, b_batch, t, t] where t = num_labels - 1.
    energies_shuffled = energies_shuffled[:, :, :-1, :-1]

    initials = [pi_time0, T.cast(T.fill(pi_time0, -1), 'int64')]

    [pis, pointers], _ = theano.scan(fn=inner_function, outputs_info=initials, sequences=[energies_shuffled[1:]])
    pi_n = pis[-1]
    pointer_n = T.argmax(pi_n, axis=1)

    back_pointers, _ = theano.scan(fn=back_pointer, outputs_info=pointer_n, sequences=[pointers], go_backwards=True)

    # prediction shape [batch_size, length]
    prediction_revered = T.concatenate([pointer_n.dimshuffle(0, 'x'), back_pointers.dimshuffle(1, 0)], axis=1)
    prediction = prediction_revered[:, T.arange(prediction_revered.shape[1] - 1, -1, -1)]
    return prediction, T.eq(prediction, targets)
  def _InitializeModelThatPredictsCharsMultiSoftmax(self,learning_rate, num_softmaxes=5):
    image_input = T.tensor4('image_input')
    print ("num_of_softmax: " + str(num_softmaxes))
    #prediction_layer = self._BuildModelToPredictFirstChar(image_input)
    prediction_layer = self._BuildModelToPredictCharsMultiSoftmax(
        image_input, num_softmaxes=num_softmaxes)

    target_chars_input = T.imatrix('target_chars_input')
    target_chars = target_chars_input[:, :num_softmaxes].reshape(shape=(-1,))

    # Create a loss expression for training, Using cross-entropy loss.
    prediction = lasagne.layers.get_output(prediction_layer)
    l_loss = lasagne.objectives.categorical_crossentropy(prediction, target_chars)
    loss = l_loss.mean()

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum.
    params = lasagne.layers.get_all_params(prediction_layer, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
    	loss, params, learning_rate, momentum=0.9)
    #updates = lasagne.updates.adagrad(loss, params, learning_rate=0.0001)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(prediction_layer, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
							    target_chars)
    test_loss = test_loss.mean()

    predicted_chars = T.argmax(test_prediction, axis=1)
    correctly_predicted_chars = T.eq(predicted_chars, target_chars)
    # An expression for the classification accuracy:
    test_acc = T.mean(correctly_predicted_chars,
		      dtype=theano.config.floatX)
    predicted_chars = predicted_chars.reshape(shape=(-1, num_softmaxes))
    correctly_predicted_chars = correctly_predicted_chars.reshape(shape=(-1, num_softmaxes))
    num_chars_matched = T.sum(correctly_predicted_chars, axis=1, dtype=theano.config.floatX)
    seq_test_acc = T.mean(T.eq(num_chars_matched, T.fill(num_chars_matched, num_softmaxes)),
                          dtype=theano.config.floatX)
    test_prediction = test_prediction.reshape(shape=(-1, num_softmaxes, len(self.CHARS)))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        [image_input, target_chars_input],
        loss,
        updates=updates,
        allow_input_downcast=True)

    # Compile a second function computing the prediction, validation loss and accuracy:
    test_fn = theano.function([image_input, target_chars_input],
			      [test_loss, test_acc, seq_test_acc],
                              allow_input_downcast=True)

    # Compile a third function computing the prediction.
    inference_fn = theano.function([image_input],
			           [predicted_chars, test_prediction],
                                   allow_input_downcast=True)

    return prediction_layer, train_fn, test_fn, inference_fn
  def _InitializeModelThatPredictsAllChars(
      self, learning_rate, bidirectional_rnn=False, use_mask_input=False,
      lstm_layer_units=256):
    image_input = T.tensor4('image_input')
    num_rnn_steps = self.num_rnn_steps
    target_chars_input = T.imatrix('target_chars')
    target_chars = target_chars_input[:, :num_rnn_steps]
    target_chars = target_chars.reshape(shape=(-1,))

    mask_input_input = None
    mask_input = None
    if use_mask_input:
      mask_input_input = T.imatrix('mask_input')
      mask_input = mask_input_input[:, :num_rnn_steps]
      #mask_input = mask_input.reshape(shape=(-1,))
    prediction_layer, l_cnn, l_lstm = self._BuildModelToPredictAllChars(
        image_input, num_rnn_steps=num_rnn_steps, mask_input=mask_input,
        bidirectional_rnn=bidirectional_rnn, lstm_layer_units=lstm_layer_units)

    # Create a loss expression for training, Using cross-entropy loss.
    #prediction = lasagne.layers.get_output(prediction_layer)
    prediction, l_cnn, l_lstm = tuple(
        lasagne.layers.get_output([prediction_layer, l_cnn, l_lstm]))
    l_loss = lasagne.objectives.categorical_crossentropy(prediction, target_chars)
    if use_mask_input:
      l_loss = l_loss.reshape(shape=(-1, num_rnn_steps))
      l_loss *= mask_input
      loss = l_loss.sum() / mask_input.sum()
    else:
      loss = l_loss.mean()

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum.
    params = lasagne.layers.get_all_params(prediction_layer, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
     	loss, params, learning_rate, momentum=0.9)
    #updates = lasagne.updates.adagrad(loss, params, learning_rate=0.001)

    grads = theano.grad(loss, params)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(prediction_layer, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
							    target_chars)
    test_loss = test_loss.mean()

    predicted_chars = T.argmax(test_prediction, axis=1)
    correctly_predicted_chars = T.eq(predicted_chars, target_chars)
    # An expression for the classification accuracy:
    test_acc = T.mean(correctly_predicted_chars,
		      dtype=theano.config.floatX)
    predicted_chars = predicted_chars.reshape(shape=(-1, num_rnn_steps))
    correctly_predicted_chars = correctly_predicted_chars.reshape(shape=(-1, num_rnn_steps))
    num_chars_matched = T.sum(correctly_predicted_chars, axis=1, dtype=theano.config.floatX)
    seq_test_acc = T.mean(T.eq(num_chars_matched, T.fill(num_chars_matched, num_rnn_steps)),
                          dtype=theano.config.floatX)
    test_prediction = test_prediction.reshape(shape=(-1, num_rnn_steps, len(self.CHARS)))

    mask_input_vec = [mask_input_input] if use_mask_input else []
    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        [image_input, target_chars_input] + mask_input_vec,
        loss,
        updates=updates,
        allow_input_downcast=True)

    # Compile a second function computing the prediction, validation loss and accuracy:
    test_fn = theano.function([image_input, target_chars_input] + mask_input_vec,
			      [test_loss, test_acc, seq_test_acc],
                              allow_input_downcast=True)

    # Compile a third function computing the prediction.
    inference_fn = theano.function([image_input] + mask_input_vec,
			           [predicted_chars, test_prediction],
                                   allow_input_downcast=True)

    return prediction_layer, train_fn, test_fn, inference_fn
Exemple #47
0
	def __init__(self, voca_size, hidden_size, lstm_layers_num, learning_rate=0.2):
		self.voca_size = voca_size
		self.hidden_size = hidden_size
		self.lstm_layers_num = lstm_layers_num
		self.learning_rate = learning_rate
		self._train = None
		self._utter = None
		self.params = []
		self.encoder_lstm_layers = []
		self.decoder_lstm_layers = []
		self.hos = []
		self.Cos = []

		encoderInputs, encoderMask = tensor.imatrices(2)
		decoderInputs, decoderMask, decoderTarget = tensor.imatrices(3)

		self.lookuptable = theano.shared(
			name="Encoder LookUpTable",
			value=utils.init_norm(self.voca_size, self.hidden_size),
			borrow=True
			)
		self.linear = theano.shared(
			name="Linear",
			value=utils.init_norm(self.hidden_size, self.voca_size),
			borrow=True
			)
		self.params += [self.lookuptable, self.linear]    #concatenate
		
		#(max_sent_size, batch_size, hidden_size)
		state_below = self.lookuptable[encoderInputs.flatten()].reshape((encoderInputs.shape[0], encoderInputs.shape[1], self.hidden_size))
		for _ in range(self.lstm_layers_num):
			enclstm = LSTM(self.hidden_size)
			self.encoder_lstm_layers += enclstm,    #append
			self.params += enclstm.params    #concatenate
			hs, Cs = enclstm.forward(state_below, encoderMask)
			self.hos += hs[-1],
			self.Cos += Cs[-1],
			state_below = hs

		state_below = self.lookuptable[decoderInputs.flatten()].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size))
		for i in range(self.lstm_layers_num):
			declstm = LSTM(self.hidden_size)
			self.decoder_lstm_layers += declstm,    #append
			self.params += declstm.params    #concatenate
			ho, Co = self.hos[i], self.Cos[i]
			state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)
		decoder_lstm_outputs = state_below

		ei, em, di, dm, dt = tensor.imatrices(5)    #place holders
		#####################################################
		#####################################################
		linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear)
		softmax_outputs, updates = theano.scan(
			fn=lambda x: tensor.nnet.softmax(x),
			sequences=[linear_outputs],
			)

		def _NLL(pred, y, m):
			return -m * tensor.log(pred[tensor.arange(decoderInputs.shape[1]), y])
		costs, updates = theano.scan(fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
		loss = costs.sum() / decoderMask.sum()

		gparams = [tensor.grad(loss, param) for param in self.params]
		updates = [(param, param - self.learning_rate*gparam) for param, gparam in zip(self.params, gparams)]

		self._train = theano.function(
			inputs=[ei, em, di, dm, dt],
			outputs=[loss, costs],
			updates=updates,
			givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt}
			)
		#####################################################
		#####################################################
		hs0, Cs0 = tensor.as_tensor_variable(self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0")
		token_idxs = tensor.fill( tensor.zeros_like(decoderInputs, dtype="int32"), utils.idx_start)
		msk = tensor.fill( (tensor.zeros_like(decoderInputs, dtype="int32")), 1)

		def _step(token_idxs, hs_, Cs_):
			hs, Cs = [], []
			state_below = self.lookuptable[token_idxs].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size))
			for i, lstm in enumerate(self.decoder_lstm_layers):
				h, C = lstm.forward(state_below, msk, hs_[i], Cs_[i])    #mind msk
				hs += h[-1],
				Cs += C[-1],
				state_below = h
			hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(Cs)
			next_token_idx = tensor.cast( tensor.dot(state_below, self.linear).argmax(axis=-1), "int32" )
			return next_token_idx, hs, Cs

		outputs, updates = theano.scan(
			fn=_step,
			outputs_info=[token_idxs, hs0, Cs0],
			n_steps=utils.max_sent_size
			)
		listof_token_idx = outputs[0]
		self._utter = theano.function(
			inputs=[ei, em, di],
			outputs=listof_token_idx,
			givens={encoderInputs:ei, encoderMask:em, decoderInputs:di}
			#givens={encoderInputs:ei, encoderMask:em}
			)
Exemple #48
0
    def test_exp_over_1_plus_exp(self):
        m = self.get_mode(excluding=['local_elemwise_fusion'])

        x = T.vector()
        data = numpy.random.rand(54).astype(config.floatX)

        backup = config.warn.identify_1pexp_bug
        config.warn.identify_1pexp_bug = False
        try:
            # tests exp_over_1_plus_exp
            f = theano.function([x], T.exp(x) / (1 + T.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = theano.function([x], T.exp(x) / (2 + T.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], T.exp(x) / (1 - T.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], T.exp(x + 1) / (1 + T.exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp
            f = theano.function([x], T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = theano.function([x], T.fill(x, 1.0) / (2 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], T.fill(x, 1.0) / (1 - T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = theano.function([x], T.fill(x, 1.1) / (1 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp with neg
            f = theano.function([x], T.fill(x, -1.0) / (1 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid,
                    theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], T.fill(x, -1.0) / (1 - T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], T.fill(x, -1.0) / (2 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], T.fill(x, -1.1) / (1 + T.exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    theano.tensor.inplace.neg_inplace]
            f(data)

            # tests double inv_1_plus_exp with neg
            # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
            # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
            # = - (sigm(x) * sigm(x))
            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
                                ((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid,
                    T.mul]
            f(data)
            f = theano.function([x], (T.fill(x, -1.1) * T.exp(x)) /
                                ((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    T.mul, theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
                                ((2 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    T.mul, theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
                                ((1 + T.exp(x)) * (2 + T.exp(-x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    T.mul, theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
                                ((1 + T.exp(x)) * (1 + T.exp(x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    T.mul, theano.tensor.inplace.neg_inplace]
            f(data)
            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
                                ((1 + T.exp(x)) * (2 + T.exp(-x))), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
                    T.mul, theano.tensor.inplace.neg_inplace]
            f(data)

        finally:
            # Restore config option.
            config.warn.identify_1pexp_bug = backup