Example #1
0
def compute_sym_blk_tridiag_inv_b(S, D, b):
    '''
    Symbolically solve Cx = b for x, where C is assumed to be *symmetric* block matrix.

    Input: 
    D  - (T x n x n) diagonal blocks of the inverse
    S  - (T-1 x n x n) intermediary matrix computation returned by  
         the function compute_sym_blk_tridiag

    Output: 
    x - (T x n) solution of Cx = b 

   From: 
    Jain et al, 2006
  "Numerically Stable Algorithms for Inversion of Block Tridiagonal and Banded Matrices"

    (c) Evan Archer, 2015
    '''
    nT = T.shape(b)[0]
    d = T.shape(b)[1]
    initp = T.zeros([d], dtype=theano.config.floatX)
    inity = T.zeros([d], dtype=theano.config.floatX)
    initq = T.zeros([d], dtype=theano.config.floatX)

    def compute_p(idx, pp, b, S):
        pm = ifelse(T.eq(idx, nT - 1), b[-1],
                    b[idx] + T.dot(S[T.max([-idx - 1, -nT + 1])], pp))
        return pm

    p, updates = theano.scan(compute_p,
                             sequences=[T.arange(nT - 1, -1, -1)],
                             outputs_info=initp,
                             non_sequences=[b, S])

    def compute_q(idx, qm, b, S, D):
        qp = ifelse(
            T.eq(idx, 0), T.dot(T.dot(T.transpose(S[-1]), D[0]), b[0]),
            T.dot(T.transpose(S[-idx - 1]), qm + T.dot(D[idx], b[idx])))
        return qp

    q, updates_q = theano.scan(compute_q,
                               sequences=[T.arange(nT - 1)],
                               outputs_info=p[0],
                               non_sequences=[b, S, D])

    def compute_y(idx, p, q, S, D):
        yi = ifelse(
            T.eq(idx, 0), T.dot(D[0], p[-1]),
            ifelse(T.eq(idx, nT - 1),
                   T.dot(D[-1], p[0]) + q[-1],
                   T.dot(D[idx], p[-idx - 1]) + q[idx - 1]))
        return yi

    y, updates_y = theano.scan(compute_y,
                               sequences=[T.arange(nT)],
                               outputs_info=None,
                               non_sequences=[p, q, S, D])

    #return [y, updates_q+updates+y]
    return y
Example #2
0
def RecurrentPredictor(X,drop_prob,mask=None):
    batch_size = T.shape(X)[0]
    seq_len = T.shape(X)[1]
    emb_phons = lib.ops.dropout(lib.ops.Embedding(
        'DurationPredictor.Embedding_Phonemes',
        V,
        256,
        X
    ),drop_prob)

    gru = lib.ops.dropout(lib.ops.BiGRU(
        'DurationPredictor.BiGRU',
        emb_phons,
        256,
        256,
        mask=mask
    ),drop_prob)

    out = T.nnet.relu(lib.ops.Linear(
        'DurationPredictor.FC',
        gru,
        512,
        1
    ))[:,:,0]

    return out
    def regularizer_cost(self, L_t):
        """ Penalize if the expected set size is far from the target value

        :type L_t: T.matrix
        :param L_t: The Ensemble matrix

        :type return value: T.fscalar
        :param return value: The Cost
        . """

        num_sents = T.shape(L_t)[0]

        # Calculate expectation value
        #eigenvalues, _ = T.nlinalg.eigh(L_t)
        #expected_set_size = T.dot(eigenvalues, 1 / (eigenvalues + 1))
        K = T.eye(num_sents) - T.nlinalg.matrix_inverse(L_t + T.eye(num_sents))
        expected_set_size = T.nlinalg.trace(K)

        # We need to scale 'expected_set_size' because we remove empty sets while sampling.
        num_sents = T.shape(L_t)[0]
        L_I = L_t + T.eye(num_sents)
        det_L_I = Det()(L_I)
        factor = det_L_I / (det_L_I - 1.0)
        expected_set_size *= factor

        return 2 * (expected_set_size - self.args.num_target_sentences
                    ) * self.args.regularizer, expected_set_size
Example #4
0
def neg_log_normal_mixture_likelihood(true, parameters):

    NT = T.shape(true)[0]
    D = T.shape(true)[1]
    M = T.shape(parameters)[1] // (D + D**2 + 1)
    means, sigmas, weights = mapping(true, parameters)
    two_pi = 2 * np.pi
    log2pi = np.log(two_pi)

    def log_single_data_point(i, means, sigmas, weights, true):
        mu = means[i, :, :]
        P = sigmas[i, :, :, :]
        al = weights[i, :]
        tr = true[i, :]

        def log_single_component(c, mu, P, al, tr):
            L = T.tril(P[c, :, :], k=-1) + T.diag(T.exp(T.diagonal(
                P[c, :, :])))
            z = T.exp(-0.5 * T.sum(T.dot(T.transpose(L), (tr - mu[c, :]))**2) +
                      T.log(al[c]) + T.log(T.nlinalg.det(L)) - D * log2pi / 2.)
            return z

        z, _ = theano.scan(fn=log_single_component,
                           sequences=T.arange(M),
                           non_sequences=[mu, P, al, tr])
        return T.log(T.sum(z) + 1e-44)

    Z, _ = theano.scan(fn=log_single_data_point,
                       sequences=T.arange(NT),
                       non_sequences=[means, sigmas, weights, true])
    return -T.mean(Z)
Example #5
0
    def sample_h_given_v(self, v0_sample):
        ''' This function infers state of hidden units given visible units '''
        # compute the activation of the hidden units given a sample of
        # the visibles
        pre_sigmoid_h1, h1_mean = self.propup(v0_sample)

        # LARGER mu IS MORE SPARSE.
        mu = 0.000001  # mu = 0.01 is probably too small.
        # LOOKED AT THE CODE HERE: http://lrn2cre8.ofai.at/lrn2/doc/_modules/lrn2/models/srbm_goh.html#SRBM_Goh
        ## DAN ADDED:#########################
        rank_0 = ((h1_mean.argsort(axis=0)).argsort(axis=0).astype(
            theano.config.floatX) + 1.) / T.shape(h1_mean)[0].astype(
                theano.config.floatX)

        rank_1 = ((h1_mean.argsort(axis=1)).argsort(axis=1).astype(
            theano.config.floatX) + 1.) / T.shape(h1_mean)[1].astype(
                theano.config.floatX)

        h1_mean = (1. - 0.5) * (rank_0**((1. / mu) - 1.)) + 0.5 * (rank_1**(
            (1. / mu) - 1.))

        #pre_sigmoid_h1_bin = T.log(h1_mean) - T.log(1. - h1_mean)
        #pre_sigmoid_h1 = pre_sigmoid_h1_bin
        #######################################

        # get a sample of the hiddens given their activation
        # Note that theano_rng.binomial returns a symbolic sample of dtype
        # int64 by default. If we want to keep our computations in floatX
        # for the GPU we need to specify to return the dtype floatX
        h1_sample = self.theano_rng.binomial(size=h1_mean.shape,
                                             n=1,
                                             p=h1_mean,
                                             dtype=theano.config.floatX)
        return [pre_sigmoid_h1, h1_mean, h1_sample]
Example #6
0
def get_sensi_speci(y_hat, y):
    # y_hat = T.concatenate(T.sum(input=y_hat[:, 0:2], axis=1), T.sum(input=y_hat[:, 2:], axis=1))
    y_hat = T.stacklists([y_hat[:, 0] + y_hat[:, 1], y_hat[:, 2] + y_hat[:, 3] + y_hat[:, 4]]).T
    y_hat = T.argmax(y_hat)

    tag = 10 * y_hat + y
    tneg = T.cast((T.shape(tag[(T.eq(tag, 0.)).nonzero()]))[0], config.floatX)
    fneg = T.cast((T.shape(tag[(T.eq(tag, 1.)).nonzero()]))[0], config.floatX)
    fpos = T.cast((T.shape(tag[(T.eq(tag, 10.)).nonzero()]))[0], config.floatX)
    tpos = T.cast((T.shape(tag[(T.eq(tag, 11.)).nonzero()]))[0], config.floatX)



    # assert fneg + fneg + fpos + tpos == 1380
    # tneg.astype(config.floatX)
    # fneg.astype(config.floatX)
    # fpos.astype(config.floatX)
    # tpos.astype(config.floatX)

    speci = ifelse(T.eq((tneg + fpos), 0), np.float64(float('inf')), tneg / (tneg + fpos))
    sensi = ifelse(T.eq((tpos + fneg), 0), np.float64(float('inf')), tpos / (tpos + fneg))

    # keng die!!!
    # if T.eq((tneg + fpos), 0):
    #     speci = float('inf')
    # else:
    #     speci = tneg // (tneg + fpos)
    # if T.eq((tpos + fneg), 0.):
    #     sensi = float('inf')
    # else:
    #     sensi = tpos // (tpos + fneg)

    # speci.astype(config.floatX)
    # sensi.astype(config.floatX)
    return [sensi, speci]
Example #7
0
    def activation(self, conv_out):

        conv_out = T.reshape(
            conv_out,
            (T.shape(conv_out)[0], T.shape(conv_out)[1] // self.n_pieces,
             self.n_pieces, T.shape(conv_out)[2], T.shape(conv_out)[3]))
        return T.max(conv_out, axis=2)
Example #8
0
    def bias_h(self, v_in):
        """
        Calculate latent activation biases, combined for sparsity and
        selectivity.
        """
        h_act = self.activation_h(v_in)

        h_act = h_act.dimshuffle(1,0,2,3)
        shape_before = h_act.shape
        h_act = h_act.reshape((h_act.shape[0], -1))

        rank_0 = ((h_act.argsort(axis=0)
                   ).argsort(axis=0).astype(fx) + 1.
                  ) / T.shape(h_act)[0].astype(fx)

        rank_1 = ((h_act.argsort(axis=1)
                   ).argsort(axis=1).astype(fx) + 1.
                  ) / T.shape(h_act)[1].astype(fx)

        # Interpolate towards the average of the sparsity and selectivity bias
        # matrices.
        lat_act = (1. - self.interp) * (rank_0 ** ((1. / self.mu) - 1.)) \
                   + self.interp * (rank_1 ** ((1. / self.mu) - 1.))

        lat_act = lat_act.reshape(shape_before)
        lat_act = lat_act.dimshuffle(1,0,2,3)

        # inverse of sigmoid
        lat_act_logit = T.log(lat_act) - T.log(1. - lat_act)

        return lat_act_logit
Example #9
0
 def call(self, x, mask=None):
     R = T.reshape(
         x, (T.shape(x)[0], T.shape(x)[1] / self.OneOnX, self.OneOnX))
     M = K.max(R, axis=(2), keepdims=True)
     R = K.switch(K.equal(R, M), R, 0.)
     R = T.reshape(R, (T.shape(x)[0], T.shape(x)[1]))
     return R
Example #10
0
def DeepVoice(X, drop_prob):
    batch_size = T.shape(X)[0]
    seq_len = T.shape(X)[1]
    emb_phons = T.extra_ops.to_one_hot(X.flatten(),V).reshape((batch_size,-1,V))
    out = T.nnet.relu(lib.ops.Linear(
        'DurationPredictor.FC.1',
        emb_phons,
        V,
        256
    ))

    out = lib.ops.dropout(T.nnet.relu(lib.ops.Linear(
        'DurationPredictor.FC.2',
        out,
        256,
        256
    )),drop_prob)

    out = lib.ops.dropout(lib.ops.RNN(
        'GRU',
        'DurationPredictor.GRU',
        out,
        256,
        128,
        n_layers=2,
        residual=False
    )[:,:,-1],drop_prob)

    out = lib.ops.Linear(
        'DurationPredictor.FC.3',
        out,
        128,
        1
    )[:,:,0]
    return out
Example #11
0
    def get_output_for(self, upscaled, **kwargs):
        a, b = self.scale_factor
        # get output for pooling and pre-pooling layer
        inp, out =\
                lasagne.layers.get_output([self.pool2d_layer_in,
                                           self.pool2d_layer])
        # upscale the input feature map by scale_factor
        if b > 1:
            upscaled = T.extra_ops.repeat(upscaled, b, 3)
        if a > 1:
            upscaled = T.extra_ops.repeat(upscaled, a, 2)
        # get the shapes for pre-pooling layer and upscaled layer
        sh_pool2d_in = T.shape(inp)
        sh_upscaled = T.shape(upscaled)
        # in case the shape is different left-bottom-pad with zero
        tmp = T.zeros(sh_pool2d_in)
        indx = (slice(None), slice(None), slice(0, sh_upscaled[2]),
                slice(0, sh_upscaled[3]))
        upscaled = T.set_subtensor(tmp[indx], upscaled)
        # get max pool indices
        indices_pool = T.grad(None,
                              wrt=inp,
                              known_grads={out: T.ones_like(out)})
        # mask values using indices_pool
        f = indices_pool * upscaled

        return f
Example #12
0
 def get_output_for(self, inputs, **kwargs):
     #input = (batch,channels,14,14)
     #boxes = (batch,num_boxes,5)
     #out = (batch,channels,num_boxes)
     # For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
     input = inputs[0]
     boxes = inputs[1]
     #assert(input.shape[0]==boxes.shape[0])
     batch = T.shape(input)[0]
     channels = T.shape(input)[1]
     height = T.shape(input)[2]
     width = T.shape(input)[3]
     num_boxes = T.shape(boxes)[1]  #/batch
     _boxes = boxes.dimshuffle((2, 1, 0)).reshape(
         (5, num_boxes * batch)
     ).dimshuffle(
         (1, 0)
     )  #((boxes.dimshuffle((2,1,0))).reshape((5,num_boxes*batch))).dimshuffle((1,0))#boxes#.T.reshape((5,num_boxes*batch)).T
     #for bt in range(batch):
     #    _boxes[bt*num_boxes:(bt+1)*num_boxes,0]=bt
     #output = T.zeros((batch * num_boxes , channels, self.num_features))
     op = ROIPoolingOp(pooled_h=self.pool_dims,
                       pooled_w=self.pool_dims,
                       spatial_scale=self.sp_scale)
     output = op(
         input, _boxes
     )  #num_boxes*batch,channels,height,width --> batch,channels*height*width,num_boxes
     #output = output[0].reshape((batch,num_boxes,channels*self.pool_dims*self.pool_dims)).dimshuffle((0,2,1))
     #output = output[0].reshape((batch*num_boxes,channels*self.pool_dims*self.pool_dims))
     #output = output.dimshuffle((1,0)).reshape((channels*self.pool_dims*self.pool_dims,num_boxes,batch)).dimshuffle((2,0,1))
     return output[0]
    def binarize_weights_a(self, W, eval):

        if self.binary_training == True:
            if self.stochastic_training == True:
                p = self.hard_sigm(W / self.W0_a)
                srng = theano.sandbox.rng_mrg.MRG_RandomStreams(
                    self.rng.randint(999998))
                p_mask = T.cast(srng.binomial(n=1, p=p, size=T.shape(W)),
                                theano.config.floatX)
                Wb = T.switch(p_mask, self.W0_a, -self.W0_a)
            else:
                Wb = T.switch(T.ge(W, 0), self.W0_a, -self.W0_a)
        elif self.ternary_training == True:
            if self.stochastic_training == True:
                w_sign = T.gt(W, 0) * 2 - 1
                p = self.clipped_v(W / self.W0_a)
                srng = theano.sandbox.rng_mrg.MRG_RandomStreams(
                    self.rng.randint(999998))
                Wb = self.W0_a * w_sign * T.cast(
                    srng.binomial(n=1, p=p, size=T.shape(W)),
                    theano.config.floatX)
            else:
                larger_than_neg_0_5 = T.gt(W, -self.W0_a / 3)
                larger_than_pos_0_5 = T.gt(W, self.W0_a / 3)
                W_val = larger_than_neg_0_5 * 1 + larger_than_pos_0_5 * 1 - 1
                Wb = W_val * self.W0_a
        else:
            Wb = W

        return Wb
    def sample_h_given_v(self, v0_sample):
        ''' This function infers state of hidden units given visible units '''
        # compute the activation of the hidden units given a sample of
        # the visibles
        pre_sigmoid_h1, h1_mean = self.propup(v0_sample)

        ##################################################################
        ## Sparsity: #####################################################
        ##################################################################
        rank_0 = ((h1_mean.argsort(axis=0)).argsort(axis=0).astype(
            theano.config.floatX) + 1.) / T.shape(h1_mean)[0].astype(
                theano.config.floatX)
        rank_1 = ((h1_mean.argsort(axis=1)).argsort(axis=1).astype(
            theano.config.floatX) + 1.) / T.shape(h1_mean)[1].astype(
                theano.config.floatX)
        h1_mean = (1. - 0.9) * (rank_0**((1. / 0.99) - 1.)) + 0.9 * (rank_1**(
            (1. / 0.99) - 1.))

        # get a sample of the hiddens given their activation
        # Note that theano_rng.binomial returns a symbolic sample of dtype
        # int64 by default. If we want to keep our computations in floatX
        # for the GPU we need to specify to return the dtype floatX
        h1_sample = self.theano_rng.binomial(size=h1_mean.shape,
                                             n=1,
                                             p=h1_mean,
                                             dtype=theano.config.floatX)
        return [pre_sigmoid_h1, h1_mean, h1_sample]
Example #15
0
def discrete_grads(loss,network,LR):
    global update_type,best_params,H,N,th # th is a parameter that controls the nonlinearity of state transfer probability

    W_params = lasagne.layers.get_all_params(network, discrete=True) #Get all the weight parameters
    layers = lasagne.layers.get_all_layers(network)
	
    W_grads = []
    for layer in layers:
        params = layer.get_params(discrete=True)
        if params:
            W_grads.append(theano.grad(loss, wrt=layer.W)) #Here layer.W = weight_tune(param)  
    updates = lasagne.updates.adam(loss_or_grads=W_grads,params=W_params,learning_rate=LR)  

    for param, parambest in izip(W_params, best_params) :

        L = 2*H/pow(2,N) #state step length in Z_N 
		
        a=random.random() #c is a random variable with binary value       
        if a<0.85:
           c = 1
        else:
           c = 0
        
        b=random.random()
        state_rand = T.round(b*pow(2,N))*L-H #state_rand is a random state in the discrete weight space Z_N
        
        delta_W1 =c*(state_rand-parambest)#parambest would transfer to state_rand with probability of a, or keep unmoved with probability of 1-a
        delta_W1_direction = T.cast(T.sgn(delta_W1),theano.config.floatX)
	dis1=T.abs_(delta_W1) #the absolute distance
        k1=delta_W1_direction*T.floor(dis1/L) #the integer part
        v1=delta_W1-k1*L #the decimal part
        Prob1= T.abs_(v1/L) #the transfer probability
	Prob1 = T.tanh(th*Prob1) #the nonlinear tanh() function accelerates the state transfer
		   
        delta_W2 = updates[param] - param
        delta_W2_direction = T.cast(T.sgn(delta_W2),theano.config.floatX)
	dis2=T.abs_(delta_W2) #the absolute distance
        k2=delta_W2_direction*T.floor(dis2/L) #the integer part
        v2=delta_W2-k2*L #the decimal part
        Prob2= T.abs_(v2/L) #the transfer probability
        Prob2 = T.tanh(th*Prob2) #the nonlinear tanh() function accelerates the state transfer  
       
        srng = RandomStreams(lasagne.random.get_rng().randint(1, 2147462579))
        Gate1 = T.cast(srng.binomial(n=1, p=Prob1, size=T.shape(Prob1)), theano.config.floatX) # Gate1 is a binary variable with probability of Prob1 to be 1
        Gate2 = T.cast(srng.binomial(n=1, p=Prob2, size=T.shape(Prob2)), theano.config.floatX) # Gate2 is a binary variable with probability of Prob2 to be 1

        delta_W1_new=(k1+delta_W1_direction*Gate1)*L #delta_W1_new = k*L where k is an integer
        updates_param1 = T.clip(parambest + delta_W1_new,-H,H)
        updates_param1 = weight_tune(updates_param1,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space

        delta_W2_new=(k2+delta_W2_direction*Gate2)*L #delta_W2_new = k*L where k is an integer  
        updates_param2 = T.clip(param + delta_W2_new,-H,H)
        updates_param2 = weight_tune(updates_param2,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space

		# if update_type<100, the weight probabilistically tranfers from parambest to state_rand, which helps to search the global minimum
        # elst it would probabilistically transfer from param to a state nearest to updates[param]	
        updates[param]= T.switch(T.lt(update_type,100), updates_param1, updates_param2) 
      
    return updates
Example #16
0
def RecurrentMapper(ctx):
    emb_ctx = lib.ops.Embedding('Mapper.Generator.Embedding_Context', V,
                                ENC_DIM, ctx)
    batch_size = T.shape(ctx)[0]
    seq_len = T.shape(ctx)[1]
    out = lib.ops.BiGRU('Mapper.Generator.BiGRU', emb_ctx, ENC_DIM, 256)
    readout = lib.ops.Linear('Mapper.Generator.FC', out, 512, EMB_DIM)
    return readout
Example #17
0
    def batched_thompson_sampling(self, bb_alpha_con, q, lower, upper,
                                  bb_alpha_samples):
        '''
        q = number of samples
        lower = lowest x value in training data
        upper = highest x value in training data (specifying range?)
        '''

        grid_size = 10000
        grid = casting(lower + np.random.rand(grid_size, len(lower)) *
                       (upper - lower))

        def sigmoid(x):
            return 1.0 / (1.0 + T.exp(-x))

        x = T.matrix('x', dtype=theano.config.floatX)

        prediction_probs = T.exp(
            LogSumExp(bb_alpha_con.network.output(self.x), 0) +
            T.log(1.0 / bb_alpha_samples)
        )**30  # 2-D array of size (n_samples, 2) where column 1 gives the probability of the constraint being unsatisfied and column two gives the probabilty of the constraint being satisfied.

        # 19 November 20:45 prediction_lg is the logistic function applied to the NN output.

        prediction_lg = sigmoid(4.0 *
                                (self.network.output(self.x) - self.y_max) /
                                (self.y_min - self.y_max) - 2.0)
        predict_lg = theano.function([self.x], prediction_lg)
        function_grid_lg = theano.function(
            [self.x], -prediction_lg[0, :, 0] * T.reshape(
                prediction_probs[:, :, 1], [T.shape(self.x)[0], 1])[:, 0])
        function_scalar_lg = theano.function(
            [self.x], -prediction_lg[0, 0, 0] * T.reshape(
                prediction_probs[:, :, 1], [T.shape(self.x)[0], 1])[0, 0])
        function_scalar_gradient_lg = theano.function(
            [self.x],
            T.grad(
                -prediction_lg[0, 0, 0] * T.reshape(
                    prediction_probs[:, :, 1], [T.shape(self.x)[0], 1])[0, 0],
                self.x))

        self.network.update_randomness(grid_size)
        X_numpy = \
        global_optimization(grid, lower, upper, function_grid_lg, function_scalar_lg, function_scalar_gradient_lg)[0]
        for i in range(1, q):
            self.network.update_randomness(grid_size)
            new_point = \
            global_optimization(grid, lower, upper, function_grid_lg, function_scalar_lg, function_scalar_gradient_lg)[
                0]  # new_point.shape = (1,2)
            X_numpy = casting(np.concatenate([X_numpy, new_point], 0))
            print(i, X_numpy)

        samples = self.predict(X_numpy)

        print("Predictive mean at selected points:\n",
              np.mean(samples, 0)[:, 0])

        return X_numpy
Example #18
0
    def compileActivation(self, net, layerNum):
        variable = net.x if layerNum == 0 else net.varArrayA[layerNum - 1]

        #Calc shapes for reshape function on-the-fly. Assume we have square images as input.
        sX = T.cast(T.sqrt(T.shape(variable)[0] / self.kernel_shape[1]), 'int16')

        #Converts input from 2 to 4 dimensions
        Xr = T.reshape(variable.T, (T.shape(variable)[1], self.kernel_shape[1], sX, sX))

        if self.optimized:
            out_size = T.cast(
                T.ceil((T.shape(Xr)[-1] - T.shape(net.varWeights[layerNum]['w'])[-1] + 1) / np.float32(self.stride)),
                'int32')

            conv_op = FilterActs(stride=self.stride)
            input_shuffled = Xr.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_shuffled = net.varWeights[layerNum]['w'].dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_flipped = filters_shuffled[:, ::-1, ::-1, :] # flip rows and columns
            contiguous_input = gpu_contiguous(input_shuffled)
            contiguous_filters = gpu_contiguous(filters_flipped *
                                                (net.dropOutVectors[layerNum].dimshuffle('x', 0, 1, 'x') if self.dropout else 1.0))
            a = conv_op(contiguous_input, contiguous_filters)
            a = a[:, :out_size, :out_size, :]
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle(0, 'x', 'x', 'x')
        else:
            a = T.nnet.conv2d(Xr, net.varWeights[layerNum]['w'] *
                              (net.dropOutVectors[layerNum].dimshuffle('x', 'x', 0, 1) if self.dropout else 1.0),
                              border_mode='valid',
                              subsample=(self.stride, self.stride))
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle('x', 0, 'x', 'x')

        if self.pooling:
            if self.optimized:
                #Pooling
                # ds - side of square pool window
                # stride - Defines the stride size between successive pooling squares.
                # Setting this parameter smaller than sizeX produces overlapping pools.
                # Setting it equal to sizeX gives the usual, non-overlapping pools. Values greater than sizeX are not allowed.
                pool_op = MaxPool(ds=self.pooling_shape, stride=self.pooling_shape)

                contiguous_input = gpu_contiguous(a)
                a = pool_op(contiguous_input)
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01
            else:
                #a = downsample.max_pool_2d(a, (self.pooling_shape, self.pooling_shape), ignore_border=False)
                a = pool.max_pool2D(a, (self.pooling_shape, self.pooling_shape), ignore_border=False)
        else:
            if self.optimized:
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01

        a = T.flatten(a, outdim=2).T

        #Sigmoid
        a = self.activation(a, self.pool_size)

        net.varArrayA.append(a)
 def infer_shape(self, node, in_shapes):
   data_shape = T.shape(node.inputs[0])
   rois_shape = T.shape(node.inputs[1])
   batch_size = rois_shape[0]
   num_maps = data_shape[1]
   h = self.pooled_h
   w = self.pooled_w
   out_shape = [batch_size, num_maps, h, w]
   return [out_shape, out_shape]
Example #20
0
 def infer_shape(self, node, in_shapes):
     data_shape = T.shape(node.inputs[0])
     rois_shape = T.shape(node.inputs[1])
     batch_size = rois_shape[0]
     num_maps = data_shape[1]
     h = self.pooled_h
     w = self.pooled_w
     out_shape = [batch_size, num_maps, h, w]
     return [out_shape, out_shape]
Example #21
0
    def compileActivation(self, net, layerNum):
        variable = net.x if layerNum == 0 else net.varArrayA[layerNum - 1]

        #Calc shapes for reshape function on-the-fly. Assume we have square images as input.
        sX = T.cast(T.sqrt(T.shape(variable)[0] / self.kernel_shape[1]), 'int16')

        #Converts input from 2 to 4 dimensions
        Xr = T.reshape(variable.T, (T.shape(variable)[1], self.kernel_shape[1], sX, sX))

        if self.optimized:
            out_size = T.cast(
                T.ceil((T.shape(Xr)[-1] - T.shape(net.varWeights[layerNum]['w'])[-1] + 1) / np.float32(self.stride)),
                'int32')

            conv_op = FilterActs(stride=self.stride)
            input_shuffled = Xr.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_shuffled = net.varWeights[layerNum]['w'].dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_flipped = filters_shuffled[:, ::-1, ::-1, :] # flip rows and columns
            contiguous_input = gpu_contiguous(input_shuffled)
            contiguous_filters = gpu_contiguous(filters_flipped *
                                                (net.dropOutVectors[layerNum].dimshuffle('x', 0, 1, 'x') if self.dropout else 1.0))
            a = conv_op(contiguous_input, contiguous_filters)
            a = a[:, :out_size, :out_size, :]
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle(0, 'x', 'x', 'x')
        else:
            a = T.nnet.conv2d(Xr, net.varWeights[layerNum]['w'] *
                              (net.dropOutVectors[layerNum].dimshuffle('x', 'x', 0, 1) if self.dropout else 1.0),
                              border_mode='valid',
                              subsample=(self.stride, self.stride))
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle('x', 0, 'x', 'x')

        if self.pooling:
            if self.optimized:
                #Pooling
                # ds - side of square pool window
                # stride - Defines the stride size between successive pooling squares.
                # Setting this parameter smaller than sizeX produces overlapping pools.
                # Setting it equal to sizeX gives the usual, non-overlapping pools. Values greater than sizeX are not allowed.
                pool_op = MaxPool(ds=self.pooling_shape, stride=self.pooling_shape)

                contiguous_input = gpu_contiguous(a)
                a = pool_op(contiguous_input)
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01
            else:
                a = downsample.max_pool_2d(a, (self.pooling_shape, self.pooling_shape), ignore_border=False)
        else:
            if self.optimized:
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01

        a = T.flatten(a, outdim=2).T

        #Sigmoid
        a = self.activation(a, self.pool_size)

        net.varArrayA.append(a)
Example #22
0
 def __init__(self, p, *args, **kwargs):
     super().__init__(*args, **kwargs)
     try:
         self.k = tt.shape(p)[-1].tag.test_value
     except AttributeError:
         self.k = tt.shape(p)[-1]
     p = tt.as_tensor_variable(floatX(p))
     self.p = (p.T / tt.sum(p, -1)).T
     self.mode = tt.argmax(p)
Example #23
0
 def __init__(self, p, *args, **kwargs):
     super(BinaryCrossEntropyLikelihood, self).__init__(*args, **kwargs)
     self.loss_func = categorical_hinge
     try:
         self.k = tt.shape(p)[-1].tag.test_value
     except AttributeError:
         self.k = tt.shape(p)[-1]
     self.p = tt.as_tensor_variable(p)
     self.mode = tt.argmax(p)
Example #24
0
 def __init__(self, p, *args, **kwargs):
     super(Categorical, self).__init__(*args, **kwargs)
     try:
         self.k = tt.shape(p)[-1].tag.test_value
     except AttributeError:
         self.k = tt.shape(p)[-1]
     self.p = p = tt.as_tensor_variable(p)
     self.p = (p.T / tt.sum(p, -1)).T
     self.mode = tt.argmax(p)
Example #25
0
    def activation(self,z):
        
        y = T.reshape(z,(T.shape(z)[0], self.n_units//self.n_pieces, self.n_pieces))

        y = T.max(y,axis=2)
        
        y = T.reshape(y,(T.shape(z)[0],self.n_units//self.n_pieces))

        return y
Example #26
0
def conv2D_keep_shape(x, w, image_shape, filter_shape, subsample=(1, 1)):
    # crop output to same size as input
    fs = T.shape(w)[2] - 1  # this is the filter size minus 1
    ims = T.shape(x)[2]     # this is the image size
    return theano.sandbox.cuda.dnn.dnn_conv(img=x,
                                            kerns=w,
                                            border_mode='full',
                                            subsample=subsample,
                                            )[:, :, fs/2:ims+fs/2, fs/2:ims+fs/2]
Example #27
0
def get_train(U_Ot, U_R, lenW, n_facts):
    def phi_x1(x_t, L):
        return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
    def phi_x2(x_t, L):
        return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
    def phi_y(x_t, L):
        return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)
    def phi_t(x_t, y_t, yp_t, L):
        return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), 1, 0), T.switch(T.lt(x_t,yp_t), 1, 0), T.switch(T.lt(y_t,yp_t), 1, 0))], axis=0)
    def s_Ot(xs, y_t, yp_t, L):
        result, updates = theano.scan(
            lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_Ot.T),
                           T.dot(U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
            sequences=[xs, T.arange(T.shape(xs)[0])])
        return result.sum()
    def sR(xs, y_t, L, V):
        result, updates = theano.scan(
            lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_R.T),
                                 T.dot(U_R, phi_y(y_t, V))),
            sequences=[xs, T.arange(T.shape(xs)[0])])
        return result.sum()

    x_t = T.iscalar('x_t')
    m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
    f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
    r_t = T.iscalar('r_t')
    gamma = T.scalar('gamma')
    L = T.fmatrix('L') # list of messages
    V = T.fmatrix('V') # vocab
    r_args = T.stack(*m)

    cost_arr = [0] * 2 * (len(m)-1)
    updates_arr = [0] * 2 * (len(m)-1)
    for i in xrange(len(m)-1):
        cost_arr[2*i], updates_arr[2*i] = theano.scan(
                lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)),
            sequences=[L, T.arange(T.shape(L)[0])])
        cost_arr[2*i+1], updates_arr[2*i+1] = theano.scan(
                lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)),
            sequences=[L, T.arange(T.shape(L)[0])])

    cost1, u1 = theano.scan(
        lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
        sequences=[V, T.arange(T.shape(V)[0])])

    cost = cost1.sum()
    for c in cost_arr:
        cost += c.sum()

    g_uo, g_ur = T.grad(cost, [U_Ot, U_R])

    train = theano.function(
        inputs=[r_t, gamma, L, V] + m + f,
        outputs=[cost],
        updates=[(U_Ot, U_Ot-alpha*g_uo), (U_R, U_R-alpha*g_ur)])
    return train
Example #28
0
def mapping(true, parameters):

    NT = T.shape(true)[0]
    D = T.shape(true)[1]
    M = T.shape(parameters)[1] // (D + D**2 + 1)

    means = parameters[:, :D * M].reshape((NT, M, D))
    sigmas = parameters[:, D * M:D * M + M * D * D].reshape((NT, M, D, D))
    weights = T.nnet.softmax(parameters[:, D * M + M * D * D:])
    return means, sigmas, weights
Example #29
0
            def GRU(i, U, W, b, x_0, s_prev):
                b1 = T.specify_shape((coversion_ones*b[i * 3,:]).T, T.shape(x_0))
                b2 = T.specify_shape((coversion_ones*b[i * 3 + 1 ,:]).T, T.shape(x_0))
                b3 = T.specify_shape((coversion_ones*b[i * 3 + 2,:]).T, T.shape(x_0))

                z = T.nnet.hard_sigmoid(U[i * 3 + 0].dot(x_0) + W[i * 3 + 0].dot(s_prev) + b1)
                r = T.nnet.hard_sigmoid(U[i * 3 + 1].dot(x_0) + W[i * 3 + 1].dot(s_prev) + b2)
                c = T.tanh(U[i * 3 + 2].dot(x_0) + W[i * 3 + 2].dot(s_prev * r) + b3)

                return (T.ones_like(z) - z) * c + z * s_prev
Example #30
0
 def __init__(self, input1, input2):
     x1_sub = input1[:, :, 2:-2, 2:-2]
     x1_flatten = T.flatten(x1_sub)
     x1 = T.extra_ops.repeat(x1_flatten, 25)
     x1 = T.reshape(x1, [T.shape(x1_flatten)[0], 25])
     x2 = neighbours.images2neibs(input2, neib_shape=(5, 5), neib_step=(1, 1))
     diff = x1 - x2
     new_shape = T.shape(x1_sub)*[1, 1, 5, 5]
     diff_img = neighbours.neibs2images(diff, neib_shape=(5, 5), original_shape=[1, 25, 25*5, 5*5])
     self.output = T.nnet.relu(diff_img)
Example #31
0
    def activation(self, z):

        y = T.reshape(z, (T.shape(z)[0], self.n_units, self.n_pieces))

        # maxout
        y = T.max(y, axis=2)

        y = T.reshape(y, (T.shape(z)[0], self.n_units))

        return y
Example #32
0
def conv2D_keep_shape(x, w, image_shape, filter_shape, subsample=(1, 1)):
    # crop output to same size as input
    fs = T.shape(w)[2] - 1  # this is the filter size minus 1
    ims = T.shape(x)[2]  # this is the image size
  #  return theano.sandbox.cuda.dnn.dnn_conv(img=x, kerns=w,
    return theano.tensor.nnet.conv2d(x,w, 
					image_shape=image_shape, filter_shape=filter_shape,
                                            border_mode='full',
                                            subsample=subsample,
                                            )[:, :, fs/2:ims+fs/2, fs/2:ims+fs/2]
Example #33
0
    def get_cost_updates(self):

        self.lr_D = T.scalar('lrD')
        self.lr_G = T.scalar('lrG')

        self.discriminator_cost = self.discriminator.logRegressionLayer.negative_log_likelihood(
        )
        discriminator_params = [self.W, self.b] + self.discriminator.params
        g_D = [
            self.lr_D * T.grad(self.discriminator_cost, param)
            for param in discriminator_params
        ]

        self.classification_error = T.mean(
            T.neq(self.discriminator.logRegressionLayer.y_pred,
                  self.y_all_input)[:T.shape(self.X_input)[0]])
        self.gen_classification_error = T.mean(
            T.neq(self.discriminator.logRegressionLayer.y_pred,
                  self.y_all_input)[T.shape(self.X_input)[0]:])
        self.discrimination_error = T.mean(
            T.neq(
                self.discriminator.logRegressionLayer.y_pred //
                self.num_of_corpus, self.y_all_input // self.num_of_corpus))

        self.cost_per_gen, updates = theano.scan(
            fn=lambda p, y: T.log(p[y] / (p[y] + p[y - self.num_of_corpus])),
            outputs_info=None,
            sequences=[
                self.discriminator.logRegressionLayer.
                p_y_given_x[T.shape(self.X_input)[0]:], self.g_label_input
            ],
            non_sequences=None)
        self.generator_cost = T.mean(self.cost_per_gen)
        g_G = [
            self.lr_G * T.grad(self.generator_cost, param)
            for param in self.generator.params
        ]

        params = discriminator_params + self.generator.params
        gparams = g_D + g_G
        self.updates = updates + [(param, T.cast(param - gparam, 'float32'))
                                  for param, gparam in zip(params, gparams)]

        self.lr_C = T.scalar('lrC')
        self.classifier_cost = self.classifier.logRegressionLayer.negative_log_likelihood(
        )
        self.classifier_error = self.classifier.logRegressionLayer.errors()
        classifier_params = [self.W, self.b] + self.classifier.params
        g_C = [
            self.lr_C * T.grad(
                self.classifier.logRegressionLayer.negative_log_likelihood(),
                param) for param in classifier_params
        ]
        self.c_updates = [(param, T.cast(param - gparam, 'float32'))
                          for param, gparam in zip(classifier_params, g_C)]
Example #34
0
    def get_output_for(self, inputs, **kwargs):
        # For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
        input = inputs[0]
        boxes = inputs[1]
        batch = T.shape(input)[0]
        channels = T.shape(input)[1]
        height = T.shape(input)[2]
        width = T.shape(input)[3]
        num_boxes = T.shape(boxes)[0]
        output = T.zeros((batch * num_boxes, channels, self.num_features))

        for idbb, bb in enumerate(range(num_boxes)):
            batch_ind = bb[0]

            pool_list = []
            #for pool_dim in self.pool_dims:
            start_w = T.clip(T.floor(bb[1] * self.sp_scale), 0, width)
            start_h = T.clip(T.floor(bb[2] * self.sp_scale), 0, heigth)
            end_w = T.clip(T.ceil(bb[3] * self.sp_scale), 0, width)
            end_h = T.clip(T.ceil(bb[4] * self.sp_scale), 0, height)

            w = T.max(end_w - start_w + 1, 1)
            h = T.amx(end_h - start_h + 1, 1)

            start_samples_y, start_sample_x = T.floor(
                _meshgrid(start_h, end_h, pool_dims + 1, start_w, end_w,
                          pool_dims + 1))
            end_samples_y, end_sample_x = T.ceil(
                _meshgrid(start_h, end_h, pool_dims + 1, start_w, end_w,
                          pool_dims + 1))

            input[batch_ind, :,
                  np.floor(py):np.ceil(samples_y[idy + 1]),
                  np.floor(px):np.ceil(samples_x[idx + 1])]

            #T.max()

            #for idx,px in enumerate(samples_x[:-1]):
            #    for idy,py in enumerate(samples_y[:-1]):

            #       (pool.dnn_pool( input[batch_ind,:,np.floor(py):np.ceil(samples_y[idy+1]),np.floor(px):np.ceil(samples_x[idx+1])],(0,0),(None,None),'max', (0,0) )).flatten(2)

            #sz_w = ( w - 1 ) // pool_dim
            #sz_h = ( h - 1 ) // pool_dim

            #str_h = w // pool_dim
            #str_w = h // pool_dim

            #pool = dnn.dnn_pool( input[bb[0],:,start_h:end_h+1,start_w:end_w+1], (sz_h,sz_w),                 (str_h,str_w), 'max', (0,0) ).flatten(2)
        pool_list.append(pool)
        output[idbb] = T.transpose(T.concatenate(
            pool_list, axis=1))  #not efficient but for the moment is ok!
        #if everything is correct this vector should be ordered as in fast RCNN
        return output
Example #35
0
 def down_sampleT(self, x, y, _sample_rate):
     length = tensor.cast(tensor.shape(y)[0] * _sample_rate, 'int32')
     id_max = tensor.cast(tensor.shape(y)[0] - 1, 'int32')
     def get_sub(i,x,y):
         idd = self.srng.random_integers(low = 0, high = id_max)
         return [x[idd], y[idd]]
     ([dx, dy], updates) = theano.scan(fn = get_sub,
             outputs_info=None,
             sequences=tensor.arange(length),
             non_sequences=[x,y])
     return dx, dy, length
Example #36
0
            def GRU(i, U, W, b, x_0, s_previous):
                U_copy, W_copy = U, W
                b1 = T.specify_shape((coversion_ones * b[i * 3, :]).T, T.shape(x_0))
                b2 = T.specify_shape((coversion_ones * b[i * 3 + 1, :]).T, T.shape(x_0))
                b3 = T.specify_shape((coversion_ones * b[i * 3 + 2, :]).T, T.shape(x_0))

                z = T.nnet.hard_sigmoid(U_copy[i * 3 + 0].dot(x_0) + W_copy[i * 3 + 0].dot(s_previous) + b1)
                r = T.nnet.hard_sigmoid(U_copy[i * 3 + 1].dot(x_0) + W_copy[i * 3 + 1].dot(s_previous) + b2)
                s_candidate = T.tanh(U_copy[i * 3 + 2].dot(x_0) + W_copy[i * 3 + 2].dot(s_previous * r) + b3)

                return (T.ones_like(z) - z) * s_candidate + z * s_previous
Example #37
0
 def __init__(self, loss_func, p, *args, **kwargs):
     super(LogLikelihood, self).__init__(*args, **kwargs)
     if loss_func is None:
         loss_func = categorical_crossentropy
     self.loss_func = loss_func
     try:
         self.k = tt.shape(p)[-1].tag.test_value
     except AttributeError:
         self.k = tt.shape(p)[-1]
     self.p = tt.as_tensor_variable(p)
     self.mode = tt.argmax(p)
Example #38
0
def apt_maf_loss_atomic_proposal(net, svi=False, combined_loss=False):
    """Define loss function for training with a atomic proposal. Assumes a
    uniform proposal distribution over each sample parameter and an externally
    provided set of alternatives.

    net: MAF-based conditional density net
    svi : bool
        Whether to use SVI version of the mdn or not
    """
    assert net.density == 'maf'
    assert not svi, 'SVI not supported for MAFs'

    # define symbolic variable to hold params that will be inferred
    # params : n_batch x  n_outputs
    # all_thetas : (n_batch * (n_atoms + 1)  x n_outputs
    # lprs  : (n_atoms + 1) x n_batch
    # stats :  n_batch x  n_inputs
    # x_nl  : (n_batch * (n_atoms + 1)) x n_inputs
    theta_all = tensorN(2, name='params_nl', dtype=dtype)
    x_nl = tensorN(2, name='stats_nl', dtype=dtype)
    lprs = tensorN(2, name='lprs', dtype=dtype)  # log tilde_p / p

    n_batch = tt.shape(lprs)[1]
    n_atoms = tt.shape(lprs)[0] - 1

    # compute MAF log-densities for true and other atoms
    lprobs = theano.clone(output=net.lprobs,
                          replace={
                              net.params: theta_all,
                              net.stats: x_nl
                          },
                          share_inputs=True)
    lprobs = tt.reshape(lprobs, newshape=(n_atoms + 1, n_batch), ndim=2)

    # compute nonnormalized log posterior probabilities
    atomic_ppZ = lprobs - lprs
    # compute posterior probability of true params in atomic task
    atomic_pp = atomic_ppZ[0, :].squeeze() - \
        MyLogSumExp(atomic_ppZ, axis=0).squeeze()

    # collect the extra input variables that have to be provided for each
    # training data point, and calculate the loss by averaging over samples
    trn_inputs = [theta_all, x_nl, lprs]
    if combined_loss:  # add prior loss on prior samples
        l_ml = lprobs[0, :].squeeze()  # direct posterior evaluation
        is_prior_sample = tensorN(1, name='prop_mask', dtype=dtype)
        trn_inputs.append(is_prior_sample)
        loss = -tt.mean(atomic_pp + is_prior_sample * l_ml)
    else:
        loss = -tt.mean(atomic_pp)

    return loss, trn_inputs
Example #39
0
 def get_output_for( self, inputs ,**kwargs ):
     # For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
     input = inputs[0]
     boxes = inputs[1]
     batch = T.shape (input)[0]
     channels = T.shape (input)[1]
     height = T.shape( input )[2]
     width = T.shape( input )[3]
     num_boxes = T.shape(boxes)[0]
     #output = T.zeros((batch * num_boxes , channels, self.num_features))
     op = ROIPoolingOp(pooled_h=self.pool_dims, pooled_w=self.pool_dims, spatial_scale=self.sp_scale)
     output = op(input, boxes)
     return output[0]
Example #40
0
    def __init__(self, p, *args, **kwargs):
        super().__init__(*args, **kwargs)
        try:
            self.k = tt.shape(p)[-1].tag.test_value
        except AttributeError:
            self.k = tt.shape(p)[-1]
        p = tt.as_tensor_variable(floatX(p))

        # From #2082, it may be dangerous to automatically rescale p at this
        # point without checking for positiveness
        self.p = p
        self.mode = tt.argmax(p, axis=-1)
        if self.mode.ndim == 1:
            self.mode = tt.squeeze(self.mode)
Example #41
0
    def grad(self, inputs, cost_grad):
        """
        Notes:
        1. The gradient is computed under the assumption that perturbations
        of the input array respect triangularity, i.e. partial derivatives wrt
        triangular region are zero.
        2. In contrast with the usual mathematical presentation, in order to
        apply theano's 'reshape' function wich implements row-order (i.e. C
        order), the differential expressions below have been derived based on
        the row-vectorizations of inputs 'a' and 'b'.

        See The Matrix Reference Manual,
        Copyright 1998-2011 Mike Brookes, Imperial College, London, UK
        """

        a, b = inputs
        ingrad = cost_grad
        ingrad = tensor.as_tensor_variable(ingrad)
        shp_a = (tensor.shape(inputs[0])[1],
                               tensor.shape(inputs[0])[1])
        I_M = tensor.eye(*shp_a)
        if self.lower:
            inv_a = solve_triangular(a, I_M, lower=True)
            tri_M = tril(tensor.ones(shp_a))
        else:
            inv_a = solve_triangular(a, I_M, lower=False)
            tri_M = triu(tensor.ones(shp_a))
        if b.ndim == 1:
            prod_a_b = tensor.tensordot(-b.T, inv_a.T, axes=1)
            prod_a_b = tensor.shape_padleft(prod_a_b)
            jac_veca = kron(inv_a, prod_a_b)
            jac_b = inv_a
            outgrad_veca = tensor.tensordot(ingrad, jac_veca, axes=1)
            outgrad_a = tensor.reshape(outgrad_veca,
                        (inputs[0].shape[0], inputs[0].shape[0])) * tri_M
            outgrad_b = tensor.tensordot(ingrad, jac_b, axes=1).flatten(ndim=1)
        else:
            ingrad_vec = ingrad.flatten(ndim=1)
            prod_a_b = tensor.tensordot(-b.T, inv_a.T, axes=1)
            jac_veca = kron(inv_a, prod_a_b)
            I_N = tensor.eye(tensor.shape(inputs[1])[1],
                               tensor.shape(inputs[1])[1])
            jac_vecb = kron(inv_a, I_N)
            outgrad_veca = tensor.tensordot(ingrad_vec, jac_veca, axes=1)
            outgrad_a = tensor.reshape(outgrad_veca,
                        (inputs[0].shape[0], inputs[0].shape[0])) * tri_M
            outgrad_vecb = tensor.tensordot(ingrad_vec, jac_vecb, axes=1)
            outgrad_b = tensor.reshape(outgrad_vecb,
                        (inputs[1].shape[0], inputs[1].shape[1]))
        return [outgrad_a, outgrad_b]
Example #42
0
    def one_hot_crossentropy(y_true, y_pred):

        # use Keras`s code to prevent nan, inf
        if theano.config.floatX == "float64":
            epsilon = 1.0e-9
        else:
            epsilon = 1.0e-7

        # cut the values between 0 and 1
        # ( in fact, Softmax makes value 0 ~ 1, so this is not need,
        # i think, maybe, process this code to prevent unexpected nan, inf )
        y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
        # scale preds so that the class probas of each sample sum to 1
        y_pred /= y_pred.sum(axis=-1, keepdims=True)

        # volabulary size
        voca_size = T.shape(y_pred)[-1]

        # convert to 1D array for indexing
        y_pred = y_pred.flatten()
        y_true = y_true.flatten().astype("int32")

        # change y_true`s word vector index to fit 1D array
        ix = T.arange(y_true.size) * voca_size + y_true

        # indexing instead of summation
        cce = -T.log(y_pred[ix])

        return cce
Example #43
0
def build_model():
    # 1. Input layer
    l_in = lasagne.layers.InputLayer(shape=(None, seq_len, n_inputs))
    batchsize, _, _ = T.shape(l_in.input_var)
    # 2. First Dense Layer    
    l_reshape_a = lasagne.layers.ReshapeLayer(
        l_in, (batchsize*seq_len,n_inputs))
    l_1_batchnorm = batchnormlayer(l=l_reshape_a, num_units=N_L1, nonlinearity=lasagne.nonlinearities.rectify)
    l_reshape_b = lasagne.layers.ReshapeLayer(
        l_1_batchnorm, (batchsize, seq_len, N_L1))
    # 3. LSTM Layers
    l_forward = lasagne.layers.LSTMLayer(l_reshape_b, N_LSTM_F)
    l_backward = lasagne.layers.LSTMLayer(l_reshape_b, N_LSTM_B, backwards=True)
    #Concat layer
    l_sum = lasagne.layers.ConcatLayer(incomings=[l_forward, l_backward], axis=2)
    # 4. Second Dense Layer
    l_reshape_c = lasagne.layers.ReshapeLayer(
        l_sum, (batchsize*seq_len, N_LSTM_F+N_LSTM_B))
    l_2_batchnorm = batchnormlayer(l=l_reshape_c, num_units=N_L2, nonlinearity=lasagne.nonlinearities.rectify)
    # 5. Output Layer
    l_recurrent_out = lasagne.layers.DenseLayer(
        l_2_batchnorm, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax)

    # Now, reshape the output back to the RNN format
    l_out = lasagne.layers.ReshapeLayer(
        l_recurrent_out, (batchsize, seq_len, num_classes))

    return l_in, l_out
 def dropout_fprop(self, input):
     
     # we reduce the precision of parameters for the computations
     self.fixed_W = apply_format(self.format, self.W, self.comp_precision, self.w_range)
     self.fixed_b = apply_format(self.format, self.b, self.comp_precision, self.b_range)
         
     # create the dropout mask
     # The cast is important because
     # int * float32 = float64 which pulls things off the gpu
     srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
     self.mask = T.cast(srng.binomial(n=1, p=self.p, size=T.shape(input)), theano.config.floatX)
     
     # apply the mask
     self.fixed_x = input * self.mask
     
     # weighted sum
     self.z = T.dot(self.fixed_x, self.fixed_W) + self.fixed_b
     self.fixed_z = apply_format(self.format, self.z, self.comp_precision, self.z_range)
     
     # activation
     self.y = self.activation(self.fixed_z)
     self.fixed_y = apply_format(self.format, self.y, self.comp_precision, self.y_range)
     
     # return the output
     return  self.fixed_y
Example #45
0
 def logit_softmax_fn(logits):
     axis_last = logits.dimshuffle(range(axis) + range(axis + 1, logits.ndim) + [axis])
     logits_flattened = T.reshape(axis_last, (-1, T.shape(axis_last)[-1]))
     logits_shifted = logits_flattened - logits_flattened.max(axis=1, keepdims=True)
     logits_normalized = (logits_shifted -
                          T.log(T.sum(T.exp(logits_shifted), axis=1, keepdims=True)))
     return T.reshape(logits_normalized, logits.shape)
Example #46
0
		def timestep(predictions, label, len_example, total_len_example):

			label_binary = T.gt(label[0:len_example-1], 0)
			oov_count = T.shape(label_binary)[0] - T.sum(label_binary)
			
			a = total_len_example
			return T.sum(T.log( 1./ predictions[T.arange(len_example-1), label[0:len_example-1]]) * label_binary ), oov_count
Example #47
0
 def MaxOut(z, *args):
     #z = T.dot(W, X) + B.dimshuffle(0, 'x')
     d = T.shape(z)
     n_elem = args[0]
     z = z.reshape((d[0] / n_elem, n_elem, d[1]))
     a = T.max(z, axis=1)
     return a
Example #48
0
def add_normal(model, name, m, v):
    new_var = model['stream'].normal(avg=m, std=T.sqrt(v))
    new_factors = [ require(T.gt(v,0)),
                    T.log(2*np.pi), 
                    -T.prod(T.shape(new_var))*T.log(v)/2,
                    -(new_var-m)**2/2/v]
    return add_stochastic(model, name, new_var, new_factors)
Example #49
0
def cross_entropy_cost(target, output, output_act, in_sided, out_sided, in_bounded, out_bounded, act):
    assert in_bounded
    #assert out_bounded
    scale_bb = 1.
    if in_bounded != 1.:
        target = target / in_bounded
    #if out_bounded != 1.:
    #    output = output / out_bounded
    #    scale_bb = 1. / out_bounded
    if not in_sided:
        target = (target+1)/(2.0)
    if not out_sided:
        output= (output+1)/(2.0)
        scale_bb = scale_bb / 2.
    ddXE = target * scale_bb * 1./(output * output) + (1 - target) * scale_bb * 1./((1 - output) * (1-output))
    ddXE /= T.shape(ddXE)[0]
    ddXE = T.cast(ddXE,dtype=theano.config.floatX)
    if act in ['sigmoid','tanh','tanhnorm','abstanh','abstanhnorm']:
        if act == 'sigmoid':
            return sigmoid_cross_entropy(target, output_act,ddXE)
        if act == 'tanh':
            return tanh_cross_entropy(target, output_act,ddXE)
        if act == 'tanhnorm':
            return tanhnorm_cross_entropy(target, output_act,ddXE)
        if act == 'abstanh':
            return abstanh_cross_entropy(target, output_act,ddXE)
        if act == 'abstanhnorm':
            return abstanhnorm_cross_entropy(target, output_act,ddXE)
    else:
        XE = target * T.log(output) + (1 - target) * T.log(1 - output)
        return [[-T.mean(T.sum(XE, axis=1),axis=0)] , ddXE]
Example #50
0
    def init_param_updates(self, layer, parameter):
        epoch = self.variables.epoch
        step = self.variables.step
        beta1 = self.beta1
        beta2 = self.beta2

        parameter_shape = T.shape(parameter).eval()
        prev_first_moment = theano.shared(
            name="{}/prev-first-moment".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_weighted_inf_norm = theano.shared(
            name="{}/prev-weighted-inf-norm".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)

        first_moment = beta1 * prev_first_moment + (1 - beta1) * gradient
        weighted_inf_norm = T.maximum(beta2 * prev_weighted_inf_norm,
                                      T.abs_(gradient))

        parameter_delta = (
            (1 / (1 - beta1 ** epoch)) *
            (first_moment / (weighted_inf_norm + self.epsilon))
        )

        return [
            (prev_first_moment, first_moment),
            (prev_weighted_inf_norm, weighted_inf_norm),
            (parameter, parameter - step * parameter_delta),
        ]
Example #51
0
def binarization(W,H,binary=True,deterministic=False,stochastic=False,srng=None):
    
    # (deterministic == True) <-> test-time <-> inference-time
    if not binary or (deterministic and stochastic):
        # print("not binary")
        Wb = W
    
    else:
        
        # [-1,1] -> [0,1]
        Wb = hard_sigmoid(W/H)
        # Wb = T.clip(W/H,-1,1)
        
        # Stochastic BinaryConnect
        if stochastic:
        
            # print("stoch")
            Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX)

        # Deterministic BinaryConnect (round to nearest)
        else:
            # print("det")
            Wb = T.round(Wb)
        
        # 0 or 1 -> -1 or 1
        Wb = T.cast(T.switch(Wb,H,-H), theano.config.floatX)
    
    return Wb
	def predict(self, input):   #input is an array of vectors (2D np.array)
		self.input = input
		padw = int(self.window/2)
		if padw>0:
			padding = np.asarray([np.zeros((self.dim_in,), dtype=theano.config.floatX)] * (padw))
			inp = T.concatenate((padding, input, padding), axis=0)
		else:
			inp = self.input
		seq = T.arange(T.shape(inp)[0]-self.window+1)
		self.input, _ = theano.scan(lambda v: inp[v : v+self.window].flatten(), sequences=seq)

		# initialize the gates
		out = theano.shared(numpy.zeros((self.dim_out,), dtype=theano.config.floatX))

		# gate computations
		def rnn_step(x, h_prev):
			if self.use_bias:
				out = T.nnet.sigmoid(T.dot(x, self.Wx) + T.dot(h_prev, self.Wh) + self.b)
			else:
				out = T.nnet.sigmoid(T.dot(x, self.Wx) + T.dot(h_prev, self.Wh))
			return out

		self.output, _ = theano.scan(fn=rnn_step, 
								  sequences = dict(input=self.input, taps=[0]), 
								  outputs_info = [out])
		if self.use_last_output:
			self.output = self.output[-1]
		if self.pooling != None:
			self.output = self.pooling(self.output)
		return self.output
Example #53
0
    def get_cost_updates(self, corruption_level, learning_rate,cost_function_name):
        """
            This function computes the cost and the updates for one trainng
            step of the dA
        """
        #print str(self.activation)
        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        #print self.activation
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        cost=None
        
        if cost_function_name=='cross_entropy':
            #print 'cross_entropy..'
            L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1-z), axis=1)
            cost = T.mean(L) #所有节点求和,然后所有batch_size求平均
        if cost_function_name=='sqr_error':
            L=(T.sum(T.square(T.abs_(self.x-z))/2.,axis=0))/T.cast(T.shape(self.x)[0],'float32')
            #theano.printing.debugprint(obj=cost,print_type=True)
            #printdebug.debugprint(cost)
            cost=T.mean(L)
            
        T.cast(cost, 'float32')
        #print cost
        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost, updates)
Example #54
0
 def bbprop(self):
     self.lin_bbprop = self.p_y_given_x - self.p_y_given_x * self.p_y_given_x
     self.lin_bbprop /= T.shape(self.p_y_given_x)[0]
     self.dict_bbprop = {}
     self.dict_bbprop.update({self.b_upmask: T.sum(self.lin_bbprop, 0)})
     self.dict_bbprop.update({self.W_upmask: T.dot(T.transpose(self.inp * self.inp), self.lin_bbprop)})
     return T.dot(self.lin_bbprop, T.transpose(self.W * self.W)), self.dict_bbprop
Example #55
0
def hessian(objective, argument):
    """
    Compute the directional derivative of the gradient
    (which is equal to the hessian multiplied by direction).
    """
    g = T.grad(objective, argument)

    # Create a new tensor A, which has the same type (i.e. same dimensionality)
    # as argument.
    A = argument.type()

    try:
        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient, rather than explicitly
        # calculating the hessian and then multiplying.
        R = T.Rop(g, argument, A)
    except NotImplementedError:
        shp = T.shape(argument)
        H = T.jacobian(g.flatten(), argument).reshape(
                                        T.concatenate([shp, shp]), 2*A.ndim)
        R = T.tensordot(H, A, A.ndim)

    try:
        hess = theano.function([argument, A], R, on_unused_input='raise')
    except theano.compile.UnusedInputError:
        warn('Theano detected unused input - suggests hessian may be zero or '
             'constant.')
        hess = theano.function([argument, A], R, on_unused_input='ignore')
    return hess
Example #56
0
def jacobian_mul_vector_l_flat(y, x, W, v, x_val, W_val, v_val):
    J = theano.gradient.jacobian(y, x)
    J_flat = T.flatten(J, J.ndim - 1) # The jacobian result on flattened matrix x
    VJ = v.dot(J_flat)
    VJ_reshape = T.reshape(VJ, T.shape(x))
    f_VJ = theano.function([x, W, v], VJ_reshape)
    return f_VJ(x_val, W_val, v_val)
Example #57
0
    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(
            fn=step,
            sequences=[mask],
            outputs_info=[
                None,
                dict(initial=T.zeros((batch_size, self.embedding_size)), taps=[-1]),
                dict(initial=ec_H, taps=[-1]),
                dict(initial=ec_C, taps=[-1])
            ]
        )

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
Example #58
0
 def init_prev_delta(self, parameter):
     parameter_shape = T.shape(parameter).eval()
     self.prev_delta = theano.shared(
         name="{}/prev-delta".format(parameter.name),
         value=asfloat(np.zeros(parameter_shape)),
     )
     return self.prev_delta
Example #59
0
    def get_ranks(self):
        p_ent1 = self.params['TransE_E'][self.px[:,0]]
        p_ent2 = self.params['TransE_E'][self.px[:,1]]
        
        def get_rank(i):
            ent1 = self.params['TransE_E'][self.px[i][0]]
            ent2 = self.params['TransE_E'][self.px[i][1]]
            rel = self.params['TransE_R'][self.py[i]]

            # all relation loss
            all_trans_disvec = ent1 + self.params['TransE_R'] - ent2
            all_transE_loss = T.batched_dot(all_trans_disvec, all_trans_disvec)
            #all_men_disvec = self.params['TransE_R'] - self.cnn_output[i]
            all_relcnn_loss = - T.dot(self.params['TransE_R'], self.cnn_output[i])

            all_loss = self.trans*all_transE_loss + self.theta*all_relcnn_loss
            
            # ground true relation loss
            gt_trans_disvec = ent1 + rel - ent2
            gt_transE_loss = T.dot(gt_trans_disvec, gt_trans_disvec)
            #gt_men_disvec = rel - self.cnn_output[i]
            gt_relcnn_loss = - T.dot(rel, self.cnn_output[i])
            gt_loss = self.trans*gt_transE_loss + self.theta*gt_relcnn_loss

            return T.sum(all_loss < gt_loss)

        res, _  = theano.scan(
                fn = get_rank,
                outputs_info = None,
                sequences = [T.arange(T.shape(self.py)[0])]
                )

        return T.mean(res)