Example #1
0
def momentum_normscaled(loss, all_params, lr, mom, batch_size, max_norm=np.inf, weight_decay=0.0,verbose=False):
    updates = []
    #all_grads = [theano.grad(loss, param) for param in all_params]
    all_grads = theano.grad(gradient_clipper(loss),all_params)

    grad_lst = [ T.sum( (  grad / float(batch_size) )**2  ) for grad in all_grads ]
    grad_norm = T.sqrt( T.sum( grad_lst ))
    if verbose:
        grad_norm = theano.printing.Print('MOMENTUM GRAD NORM1:')(grad_norm)

    all_grads = ifelse(T.gt(grad_norm, max_norm),
                       [grads*(max_norm / grad_norm) for grads in all_grads],
                       all_grads)


    if verbose:
        grad_lst = [ T.sum( (  grad / float(batch_size) )**2  ) for grad in all_grads ]
        grad_norm = T.sqrt( T.sum( grad_lst ))
        grad_norm = theano.printing.Print('MOMENTUM GRAD NORM2:')(grad_norm)
        all_grads = ifelse(T.gt(grad_norm, np.inf),
                           [grads*(max_norm / grad_norm) for grads in all_grads],
                           all_grads)

    for param_i, grad_i in zip(all_params, all_grads):
        mparam_i = theano.shared(np.zeros(param_i.get_value().shape, dtype=theano.config.floatX))
        v = mom * mparam_i - lr*(weight_decay*param_i + grad_i)

        updates.append( (mparam_i, v) )
        updates.append( (param_i, param_i + v) )

    return updates
Example #2
0
def T_subspacel1_slow_shrinkage_conv(a, L, lam_sparse, lam_slow, imshp,kshp,featshp,stride=(1,1),small_value=.001):
    featshp = (imshp[0],kshp[0],featshp[2],featshp[3]) # num images, features, szy, szx
    features = T.reshape(T.transpose(a),featshp,ndim=4)

    amp = T.sqrt(features[:,::2,:,:]**2 + features[:,1::2,:,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[1:,:,:,:] - amp[:-1,:,:,:]
    d2 = d1[1:,:,:,:] - d1[:-1,:,:,:]
    div = T.set_subtensor(div[1:-1,:,:,:], -d2)
    div = T.set_subtensor(div[0,:,:,:], -d1[0,:,:,:])
    div = T.set_subtensor(div[-1,:,:,:], d1[-1,:,:,:])
    slow_amp_shrinkage = 1 - (lam_slow / L) * (div / amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage, 0), slow_amp_shrinkage, 0)
    slow_shrinkage_prox_a = slow_amp_value * features[:, ::2, :,:]
    slow_shrinkage_prox_b = slow_amp_value * features[:,1::2, :,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a ** 2 + slow_shrinkage_prox_b ** 2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse / L) / amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage, 0.), amp_shrinkage, 0.)
    subspacel1_prox = T.zeros_like(features)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:, ::2, :,:], amp_value * slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:,1::2, :,:], amp_value * slow_shrinkage_prox_b)

    reshape_subspacel1_prox = T.transpose(T.reshape(subspacel1_prox,(featshp[0],featshp[1]*featshp[2]*featshp[3]),ndim=2))
    return reshape_subspacel1_prox
Example #3
0
 def __init__(self, alpha, m, *args, **kwargs):
     super(Pareto, self).__init__(*args, **kwargs)
     self.alpha = alpha
     self.m = m
     self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf)
     self.median = m * 2.0 ** (1.0 / alpha)
     self.variance = tt.switch(tt.gt(alpha, 2), (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), np.inf)
Example #4
0
def irprop_minus_updates(params, grads):

    # IRPROP- parameters
    updates = []
    deltas = 0.1*numpy.ones(len(params))
    last_params = params
    
    positiveStep = 1.2
    negativeStep = 0.5
    maxStep = 50.
    minStep = math.exp(-6)

    for param, gparam, delta, last_gparam in zip(params, grads, deltas, last_params):
        # calculate change
        change = T.sgn(gparam * last_gparam)
        if T.gt(change, 0) :
            delta = T.minimum(delta * positiveStep, maxStep)
            
            if T.lt(delta, minStep):
                delta = minStep
                
        elif T.lt(change, 0):
            delta = T.maximum(delta * negativeStep, minStep)
            
            if T.gt(delta, params['maxStep']):
                delta = params['maxStep']
            last_gparam = 0
            
        # update the weights
        updates.append((param, param - T.sgn(gparam) * delta))
        # store old change
        last_gparam = gparam

    return updates
Example #5
0
 def decay(self):
     updates = []
     new_batch = ifelse(T.gt(self.batch, self.decay_batch), sharedX(0), self.batch+1)
     new_lr = ifelse(T.gt(self.batch, self.decay_batch), self.lr*self.lr_decay_factor, self.lr)
     updates.append((self.batch, new_batch))
     updates.append((self.lr, new_lr))
     return updates
Example #6
0
def T_subspacel1_slow_shrinkage(a,L,lam_sparse,lam_slow,small_value=.001):
    amp = T.sqrt(a[::2,:]**2 + a[1::2,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[:,1:] - amp[:,:-1]
    d2 = d1[:,1:] - d1[:,:-1]
    div = T.set_subtensor(div[:,1:-1],-d2)
    div = T.set_subtensor(div[:,0], -d1[:,0])
    div = T.set_subtensor(div[:,-1], d1[:,-1])
    slow_amp_shrinkage = 1 - (lam_slow/L)*(div/amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage,0),slow_amp_shrinkage,0)
    slow_shrinkage_prox_a = slow_amp_value*a[::2,:]
    slow_shrinkage_prox_b = slow_amp_value*a[1::2,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a**2 + slow_shrinkage_prox_b**2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse/L)/amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage,0.),amp_shrinkage,0.)
    subspacel1_prox = T.zeros_like(a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[ ::2,:],amp_value*slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[1::2,:],amp_value*slow_shrinkage_prox_b)
    return subspacel1_prox
Example #7
0
 def __init__(self, x, lower, upper, *args, **kwargs):
     super(Uniform, self).__init__(*args, **kwargs)
     self._logp = T.log(T.switch(T.gt(x, upper), 0, T.switch(T.lt(x, lower), 0, 1/(upper - lower))))
     self._cdf = T.switch(T.gt(x, up), 1, T.switch(T.lt(x, low), 0, (x - low)/(up - low)))
     self._add_expr('x', x)
     self._add_expr('lower', lower)
     self._add_expr('upper', upper)
	def errors(self, y, print_output=False):
		# check if y has same dimension of y_pred
		if y.ndim != self.y_pred.ndim:
			raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type))

		# check if y is of the correct datatype
		if y.dtype.startswith('int'):
			num_positive = T.cast(T.sum(T.eq(y,1)),'float64')
			num_predicted_positive = T.cast(T.sum(T.eq(self.y_pred,1)),'float64')
			num_correctly_predicted = T.cast(T.sum(T.eq(self.y_pred*y,1)),'float64')

			P = T.cast(0.0,'float64')	# precision  = True positive / (True positive + False positive)
			if (T.gt(num_predicted_positive,0.0)):
				P = T.cast(num_correctly_predicted / num_predicted_positive,'float64')

			R = T.cast(0.0,'float64')	# recall     = True positive / (True positive + False negative)
			if (T.gt(num_positive,0.0)):
				R = T.cast(num_correctly_predicted / num_positive,'float64')

			F1 = T.cast(0.0,'float64')	# F1 score
			if (T.gt(P+R,0.0)):
				F1 = 2.0*P*R/(P+R)

			if (print_output):
				print("      num positive = {0}".format( num_positive ) )
				print("      num predicted positive = {0}".format( num_predicted_positive ) )
				print("      num correctly predicted = {0}".format( num_correctly_predicted ) )
				print("      precision = {0}".format(P))
				print("      recall = {0}".format(R))
				print("      F1 score = {0}".format(F1))
			return [T.mean(T.neq(self.y_pred, y)), P, R, F1]

		else:
			raise NotImplementedError()
		return
Example #9
0
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Example #10
0
    def multiclassRealPosAndNegAndTruePredPosNegTraining0OrValidation1(self, y, training0OrValidation1):
	"""
	The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
	For class_i == 0 (backgr), what is reported is the WHOLE rp,rn,tpp,tpn. ie, as calculated considering background VS all other classes.
	Order in the list is the natural order of the classes (ie class-0-WHOLE RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
	"""
	returnedListWithNumberOfRpRnPpPnForEachClass = []

	for class_i in xrange(0, self.numberOfOutputClasses) :
		#Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
		vectorOneAtRealPositives = T.gt(y, 0) if class_i == 0 else T.eq(y, class_i)
		vectorOneAtRealNegatives = T.eq(y, 0) if class_i == 0 else T.neq(y, class_i)

		if training0OrValidation1 == 0 : #training:
			yPredToUse = self.y_pred
		else: #validation
			yPredToUse = self.y_pred_inference

		vectorOneAtPredictedPositives = T.gt(yPredToUse, 0) if class_i == 0 else T.eq(yPredToUse, class_i)
		vectorOneAtPredictedNegatives = T.eq(yPredToUse, 0) if class_i == 0 else T.neq(yPredToUse, class_i)
		vectorOneAtTruePredictedPositives = T.and_(vectorOneAtRealPositives,vectorOneAtPredictedPositives)
		vectorOneAtTruePredictedNegatives = T.and_(vectorOneAtRealNegatives,vectorOneAtPredictedNegatives)
		    
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealNegatives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedNegatives) )

	return returnedListWithNumberOfRpRnPpPnForEachClass
Example #11
0
def norm_col(w, h):
    """normalize the column vector w (Theano function).
    Apply the invert normalization on h such that w.h does not change

    Parameters
    ----------
    w: Theano vector
        vector to be normalised
    h: Ttheano vector
        vector to be normalised by the invert normalistation

    Returns
    -------
    w : Theano vector with the same shape as w
        normalised vector (w/norm)
    h : Theano vector with the same shape as h
        h*norm
    """
    norm = w.norm(2, 0)
    eps = 1e-12
    size_norm = (T.ones_like(w)).norm(2, 0)
    w = ifelse(T.gt(norm, eps),
               w/norm,
               (w+eps)/(eps*size_norm).astype(theano.config.floatX))
    h = ifelse(T.gt(norm, eps),
               h*norm,
               (h*eps*size_norm).astype(theano.config.floatX))
    return w, h
Example #12
0
def calcColNormalizer(inMatrix):
    #Theano function for calculating logSum, i.e., calculate ln(X + Y) based on ln(X) and ln(Y).
    maxExp = -4950.0 
    x, y = T.fscalars(2)
    
    yMinusx = y - x  ## this part is for the condition which x > y
    xMinusy = x - y  # if x < y
    bigger = T.switch(T.gt(x, y), x, y)
    YSubtractX = T.switch(T.gt(x,y), yMinusx, xMinusy)       
    x_prime =  T.log(1 + T.exp(YSubtractX)) + bigger
    calcSum = T.switch(T.lt(YSubtractX, maxExp), bigger, x_prime)
    logSum = function([x, y], calcSum, allow_input_downcast=True)
    ####### end of logSum  ###############
    
    # now we  caclculate sum of log joint as normalizer 
    if len(inMatrix.shape) < 2:
        raise Exception ("calcColNormalizer expect a 2D matrix")
    nRows, nCols = inMatrix.shape        
    columnAccumLogSum = np.zeros(nCols)        
    for col in range(nCols):
        currLogSum = np.NINF
        for j in range(nRows):
            if inMatrix[j,col] == np.NINF:
                continue
            currLogSum = logSum(currLogSum, inMatrix[j,col])             
        columnAccumLogSum[col] = currLogSum
        
    return columnAccumLogSum
Example #13
0
def _backward_negative_z(inputs, weights, normed_relevances, bias=None):
    inputs_plus = inputs * T.gt(inputs, 0)
    weights_plus = weights * T.gt(weights, 0)
    inputs_minus = inputs * T.lt(inputs, 0)
    weights_minus = weights * T.lt(weights, 0)
    # Compute weights+ * inputs- and weights- * inputs+
    negative_part_a = conv2d(
        normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_a *= inputs_minus
    negative_part_b = conv2d(
        normed_relevances, weights_minus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_b *= inputs_plus

    together = negative_part_a + negative_part_b
    if bias is not None:
        bias_negative = bias * T.lt(bias, 0)
        bias_relevance = bias_negative.dimshuffle("x", 0, "x", "x") * normed_relevances
        # Divide bias by weight size before convolving back
        # mean across channel, 0, 1 dims (hope this is correct?)
        fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
        bias_rel_in = conv2d(
            fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
        )
        together += bias_rel_in
    return together
Example #14
0
def group_div(X, W, H, beta, params):
    """Compute beta divergence D(X|WH), intra-class distance
    and intra-session distance for a particular
    (class, session) couple [1]_.


    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar
    params : Theano tensor
        Matrix of parameter related to class/session.
            :params[0][0]: index for the (class, session) couple
            :params[1][0]: number of vector basis related to class
            :params[1][1]: number of vector basis related to session
            :params[2]: weight on the class/session similarity constraints
            :params[3]: sessions in which class c appears
            :params[4]: classes present in session s



    Returns
    -------
    cost : Theano scalar
        total cost
    div : Theano scalar
        beta divergence D(X|WH)
    sum_cls : Theano scalar
        intra-class distance
    sum_ses : Theano scalar
        intra-session distance"""
    ind = params[0][0]
    k_cls = params[1][0]
    k_ses = params[1][1]
    lambdas = params[2]
    Sc = params[3]
    Cs = params[4]
    res_ses, up = theano.scan(
        fn=lambda Cs, prior_result: prior_result
        + eucl_dist(W[ind, :, k_cls : k_cls + k_ses], W[Cs, :, k_cls : k_cls + k_ses]),
        outputs_info=T.zeros_like(beta),
        sequences=Cs,
    )
    sum_ses = ifelse(T.gt(Cs[0], 0), res_ses[-1], T.zeros_like(beta))
    res_cls, up = theano.scan(
        fn=lambda Sc, prior_result: prior_result + eucl_dist(W[ind, :, 0:k_cls], W[Sc, :, 0:k_cls]),
        outputs_info=T.zeros_like(beta),
        sequences=Sc,
    )
    sum_cls = ifelse(T.gt(Sc[0], 0), res_cls[-1], T.zeros_like(beta))
    betaDiv = beta_div(X, W[ind].T, H, beta)

    cost = lambdas[0] * sum_cls + lambdas[1] * sum_ses + betaDiv
    return cost, betaDiv, sum_cls, sum_ses
Example #15
0
def symGivens2(a, b):
    """
    Stable Symmetric Givens rotation plus reflection

    Parameters

        a: (theano scalar) first element of a two-vector  [a; b]
        b: (theano scalar) second element of a two-vector [a; b]
    Returns

        c  cosine(theta), where theta is the implicit angle of
           rotation (counter-clockwise) in a plane-rotation
        s  sine(theta)
        d  two-norm of [a; b]

    Description:
        This method gives c and s such that
            [ c  s ][a] = [d],
            [ s -c ][b]   [0]
      where d = two norm of vector [a, b],
            c = a / sqrt(a^2 + b^2) = a / d,
            s = b / sqrt(a^2 + b^2) = b / d.
      The implementation guards against overflow in computing
         sqrt(a^2 + b^2).

      SEE ALSO:
         (1) Algorithm 4.9, stable *unsymmetric* Givens
         rotations in Golub and van Loan's book Matrix
         Computations, 3rd edition.
         (2) MATLAB's function PLANEROT.

      Observations:
          Implementing this function as a single op in C might improve speed
          considerably ..
    """
    c_branch1 = T.switch(T.eq(a, constantX(0)), constantX(1), T.sgn(a))
    c_branch21 = (a / b) * T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2)
    c_branch22 = T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2)

    c_branch2 = T.switch(T.eq(a, constantX(0)), constantX(0), T.switch(T.gt(abs(b), abs(a)), c_branch21, c_branch22))
    c = T.switch(T.eq(b, constantX(0)), c_branch1, c_branch2)

    s_branch1 = T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2)
    s_branch2 = (b / a) * T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2)
    s = T.switch(
        T.eq(b, constantX(0)),
        constantX(0),
        T.switch(T.eq(a, constantX(0)), T.sgn(b), T.switch(T.gt(abs(b), abs(a)), s_branch1, s_branch2)),
    )

    d_branch1 = b / (T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2))
    d_branch2 = a / (T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2))
    d = T.switch(
        T.eq(b, constantX(0)),
        abs(a),
        T.switch(T.eq(a, constantX(0)), abs(b), T.switch(T.gt(abs(b), abs(a)), d_branch1, d_branch2)),
    )
    return c, s, d
    def __call__(self, input): 
        mean = input.mean(self.axes, keepdims=True) 
        std = input.std(self.axes, keepdims=True) + self.epsilon 

        # Don't batchnoramlise a single data point
        mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
        std  = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))

        return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
Example #17
0
def objective(y_true, y_pred, P, Q, alpha=0., beta=0.15, dbeta=0., gamma=0.01, gamma1=-1., poos=0.23, eps=1e-6):
    '''Expects a binary class matrix instead of a vector of scalar classes.
    '''

    beta = np.float32(beta)
    dbeta = np.float32(dbeta)
    gamma = np.float32(gamma)
    poos = np.float32(poos)
    eps = np.float32(eps)

    # scale preds so that the class probas of each sample sum to 1
    y_pred += eps
    y_pred /= y_pred.sum(axis=-1, keepdims=True)

    y_true = T.cast(y_true.flatten(), 'int64')
    y1 = T.and_(T.gt(y_true, 0), T.le(y_true, Q))  # in-set
    y0 = T.or_(T.eq(y_true, 0), T.gt(y_true, Q))  # out-of-set or unlabeled
    y0sum = y0.sum() + eps  # number of oos
    y1sum = y1.sum() + eps  # number of in-set
    # we want to reduce cross entrophy of labeled data
    # convert all oos/unlabeled to label=0
    cost0 = T.nnet.categorical_crossentropy(y_pred, T.switch(y_true <= Q, y_true, 0))
    cost0 = T.dot(y1, cost0) / y1sum  # average cost per labeled example

    if alpha:
        cost1 = T.nnet.categorical_crossentropy(y_pred, y_pred)
        cost1 = T.dot(y0, cost1) / y0sum  # average cost per labeled example
        cost0 += alpha*cost1

    # we want to increase the average entrophy in each batch
    # average over batch
    if beta:
        y_pred_avg0 = T.dot(y0, y_pred) / y0sum
        y_pred_avg0 = T.clip(y_pred_avg0, eps, np.float32(1) - eps)
        y_pred_avg0 /= y_pred_avg0.sum(axis=-1, keepdims=True)
        cost2 = T.nnet.categorical_crossentropy(y_pred_avg0.reshape((1,-1)), P-dbeta)[0] # [None,:]
        cost2 = T.switch(y0sum > 0.5, cost2, 0.)  # ignore cost2 if no samples
        cost0 += beta*cost2

    # binary classifier score
    if gamma:
        y_pred0 = T.clip(y_pred[:,0], eps, np.float32(1) - eps)
        if gamma1 < 0.:
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot(np.float32(1)-poos*y0.T,T.log(np.float32(1)-y_pred0))
            cost3 /= y_pred.shape[0]
            cost0 += gamma*cost3
        elif gamma1 > 0.:
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot((np.float32(1)-poos)*y0,T.log(np.float32(1)-y_pred0))
            cost3 /= y0sum
            cost31 =  - T.dot(y1,T.log(np.float32(1)-y_pred0))
            cost3 /= y1sum
            cost0 += gamma*cost3 + gamma1*cost31
        else:  # gamma1 == 0.
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot((np.float32(1)-poos)*y0, T.log(np.float32(1)-y_pred0))
            cost3 /= y0sum
            cost0 += gamma*cost3
    return cost0
def new_range(overflow, overflow_1, max_overflow):
    
    # the goal is to update the range of the vector 
    # we know the overflow rates associated with range (overflow) 
    # and range-1 (overflow_1)
    # if (overflow > max_overflow): increment range
    # else if (overflow_1 < max_overflow): decrement range
    return T.switch(T.gt(overflow, max_overflow), 1, 
        T.switch(T.gt(overflow_1, max_overflow), 0, - 1))
Example #19
0
def rprop(param,learning_rate,gparam,mask,updates,current_cost,previous_cost,
          eta_plus=1.2,eta_minus=0.5,max_delta=50, min_delta=10e-6):
    previous_grad = sharedX(numpy.ones(param.shape.eval()),borrow=True)
    delta = sharedX(learning_rate * numpy.ones(param.shape.eval()),borrow=True)
    previous_inc = sharedX(numpy.zeros(param.shape.eval()),borrow=True)
    zero = T.zeros_like(param)
    one = T.ones_like(param)
    change = previous_grad * gparam

    new_delta = T.clip(
            T.switch(
                T.eq(gparam,0.),
                delta,
                T.switch(
                    T.gt(change,0.),
                    delta*eta_plus,
                    T.switch(
                        T.lt(change,0.),
                        delta*eta_minus,
                        delta
                    )
                )
            ),
            min_delta,
            max_delta
    )
    new_previous_grad = T.switch(
            T.eq(mask * gparam,0.),
            previous_grad,
            T.switch(
                T.gt(change,0.),
                gparam,
                T.switch(
                    T.lt(change,0.),
                    zero,
                    gparam
                )
            )
    )
    inc = T.switch(
            T.eq(mask * gparam,0.),
            zero,
            T.switch(
                T.gt(change,0.),
                - T.sgn(gparam) * new_delta,
                T.switch(
                    T.lt(change,0.),
                    zero,
                    - T.sgn(gparam) * new_delta
                )
            )
    )

    updates.append((previous_grad,new_previous_grad))
    updates.append((delta,new_delta))
    updates.append((previous_inc,inc))
    return param + inc * mask
Example #20
0
def tukey_biweight(predictions, targets, c=4.685, s=1.4826):
    """
    Tukey's biweight function expressed in theano as in
    :param predictions: Prediction tensor
    :param targets: Target tensor
    :param c: Tukey tuning constant
    :param s: Consistence scale parameter
    :return: Cost function
    """
    # Flatten input to make calc easier
    pred = predictions.flatten(2)
    target = targets.flatten(2)
    # Compute mask
    mask = T.gt(target, 0)
    # Compute n of valid pixels
    n_valid = T.sum(mask, axis=1)
    # Apply mask and log transform
    m_pred = pred * mask
    m_t = T.switch(mask, target, 0)

    def median(tensor):
        """
        MAD tensor from https://groups.google.com/forum/#!topic/theano-users/I4eHjbAetEQ
        :param tensor: Input tensor
        :return: Median expression
        """
        tensor = tensor.flatten(1)
        return T.switch(T.eq((tensor.shape[0] % 2), 0),
                        # if even vector
                        T.mean(T.sort(tensor)[((tensor.shape[0] / 2) - 1): ((tensor.shape[0] / 2) + 1)]),
                        # if odd vector
                        T.sort(tensor)[tensor.shape[0] // 2])

    def mad(tensor):
        """
        Median absolute deviation
        :param tensor: Input tensor
        :return: MAD
        """
        med = median(tensor=tensor)
        return median(T.abs_(tensor - med))

    # Residual
    r_i = (m_pred - m_t)
    # r_i = r_i / (s * mad(r_i))
    r_i = r_i / r_i.std()
    # Compute the masking vectors
    tukey_mask = T.gt(T.abs_(r_i), c)

    # Cost
    cost = (c ** 2 / 6) * (1-(1 - (r_i / c) ** 2) ** 3)
    # Aggregate
    return T.sum(T.sum(T.switch(tukey_mask, (c ** 2) / 6., cost), axis=1)) / T.maximum((T.sum(n_valid)), 1)
Example #21
0
    def rebuild(self):
        for i, (inputs, f) in enumerate(self.wiring):
            if not inputs:
                continue

            lin_comb = T.dot(T.concatenate([self._vlayers[j] for j in inputs], axis=1), self._vweights[i])
            add_biases = lin_comb + self._vbiases[i]
            self._vlayers[i] = f(add_biases)

        self._output = T.concatenate([self._vlayers[j] for j in self.output_layers], axis=1)

        self._targets = [T.matrix() for j in self.output_layers]
        crossentropy = sum([(T.nnet.categorical_crossentropy(self._vlayers[j], self._targets[i])
                             if self.wiring[j][1] == SOFTMAX_FUN
                             else ((self._vlayers[j] - self._targets[i]) ** 2 / (1+self._targets[i].max())**2).sum())
                            for i, j in enumerate(self.output_layers)
                            ])

        self._cost = (crossentropy.sum() + 
                      self.L2REG/(self.layers[i]) * sum((weight**2).sum() for weight in self._vweights if weight is not None)+ # + # L2 regularization
                      0.01* self.L2REG/math.sqrt(self.layers[i]) * sum((bias**2).sum() for j, bias in enumerate(self._vbiases) if bias is not None and self.wiring[j][1] != LINEAR_FUN))  # L2 regularization

        self._costnoreg = crossentropy.sum()

        self._derivatives = [None] * len(self.layers)
        self._updates = []

        MAX_DERIV = 1000
        for i, (inputs, f) in enumerate(self.wiring):
            if not inputs:
                continue
            deriv1 = T.grad(self._cost, self._vweights[i])
            deriv1p = T.switch(T.lt(deriv1, MAX_DERIV), deriv1, MAX_DERIV)
            deriv1pp = T.switch(T.gt(deriv1p, -MAX_DERIV), deriv1p, -MAX_DERIV)
            #deriv1ppp = T.switch(T.isnan(deriv1pp), 0, deriv1pp)
            deriv2 = T.grad(self._cost, self._vbiases[i])
            deriv2p = T.switch(T.lt(deriv2, MAX_DERIV), deriv2, MAX_DERIV)
            deriv2pp = T.switch(T.gt(deriv2p, -MAX_DERIV), deriv2p, -MAX_DERIV)
            #deriv2ppp = T.switch(T.isnan(deriv2pp), 0, deriv2pp)

            self._derivatives[i] = (deriv1pp, deriv2pp)

            self._updates.append((self._vweights[i], self._vweights[i] - self.learning_rate * self._derivatives[i][0]))
            self._updates.append((self._vbiases[i], self._vbiases[i] - self.learning_rate * self._derivatives[i][1]))
        self._prediction = theano.function(inputs=[self._vlayers[i] for i in self.input_layers],
                                           outputs=self._output)
        self._train = theano.function(inputs=self._targets+[self._vlayers[i] for i in self.input_layers],
                                      outputs=self._cost,
                                      updates=self._updates, allow_input_downcast=True)
                                      #mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)) # debug NaN
        self._costfun = theano.function(inputs=self._targets+[self._vlayers[i] for i in self.input_layers],
                                      outputs=self._costnoreg, allow_input_downcast=True)
Example #22
0
def _forward_negative_z(inputs, weights, bias=None):
    inputs_plus = inputs * T.gt(inputs, 0)
    weights_plus = weights * T.gt(weights, 0)
    inputs_minus = inputs * T.lt(inputs, 0)
    weights_minus = weights * T.lt(weights, 0)
    negative_part_a = conv2d(inputs_plus, weights_minus)
    negative_part_b = conv2d(inputs_minus, weights_plus)
    together = negative_part_a + negative_part_b
    if bias is not None:
        bias_negative = bias * T.lt(bias, 0)
        together += bias_negative.dimshuffle("x", 0, "x", "x")

    return together
Example #23
0
def relevance_conv_a_b_sign_switch(inputs, weights, out_relevances, a, b, bias=None):
    assert a is not None
    assert b is not None
    assert a - b == 1
    # For each input, determine what
    outputs = conv2d(inputs, weights)
    if bias is not None:
        outputs += bias.dimshuffle("x", 0, "x", "x")
        # do not use bias further, only to determine direction of outputs
        bias = None
    # stabilize
    # prevent division by 0 and division by small numbers
    eps = 1e-4
    outputs += T.sgn(outputs) * eps
    outputs += T.eq(outputs, 0) * eps
    positive_forward = _forward_positive_z(inputs, weights, bias)
    negative_forward = _forward_negative_z(inputs, weights, bias)
    rel_for_positive_outputs = out_relevances * T.gt(outputs, 0)
    rel_for_negative_outputs = out_relevances * T.lt(outputs, 0)

    positive_norm_with_trend = positive_forward * T.gt(outputs, 0)
    negative_norm_with_trend = negative_forward * T.lt(outputs, 0)
    # minus to make overall norm positive
    norm_with_trend = positive_norm_with_trend - negative_norm_with_trend
    # stabilize also
    norm_with_trend += T.eq(norm_with_trend, 0) * eps

    in_positive_with_trend = _backward_positive_z(inputs, weights, rel_for_positive_outputs / norm_with_trend, bias)
    in_negative_with_trend = _backward_negative_z(inputs, weights, rel_for_negative_outputs / norm_with_trend, bias)

    # Minus in_negative since in_with_trend should not switch signs
    in_with_trend = in_positive_with_trend - in_negative_with_trend

    positive_norm_against_trend = positive_forward * T.lt(outputs, 0)
    negative_norm_against_trend = negative_forward * T.gt(outputs, 0)
    # minus to make overall norm positive
    norm_against_trend = positive_norm_against_trend - negative_norm_against_trend
    # stabilize also
    norm_against_trend += T.eq(norm_against_trend, 0) * eps

    in_positive_against_trend = _backward_positive_z(
        inputs, weights, rel_for_negative_outputs / norm_against_trend, bias
    )
    in_negative_against_trend = _backward_negative_z(
        inputs, weights, rel_for_positive_outputs / norm_against_trend, bias
    )
    # Minus in_negative since switching signs is done below
    in_against_trend = in_positive_against_trend - in_negative_against_trend

    in_relevances = a * in_with_trend - b * in_against_trend
    return in_relevances
Example #24
0
def getOverlap(a, b):
    '''
    Given (batch_size, grid_num, box_num, 4) tensors, return 
    (batch_size, grid_num, box_num) overlap area
    '''
    a, b = get_bound(a), get_bound(b)
    xmin = get_max(a, b, 0)
    xmax = get_min(a, b, 2)
    ymin = get_max(a, b, 1)
    ymax = get_min(a, b, 3)
    xside, yside = xmax-xmin, ymax-ymin
    xside = T.switch(T.gt(xside, 0), xside, 0)
    yside = T.switch(T.gt(yside, 0), yside, 0)
    return xside * yside
Example #25
0
    def __init__(self, alpha, m, *args, **kwargs):
        super(Pareto, self).__init__(*args, **kwargs)
        self.alpha = alpha
        self.m = m
        self.mean = tt.switch(tt.gt(alpha, 1), alpha *
                              m / (alpha - 1.), np.inf)
        self.median = m * 2.**(1. / alpha)
        self.variance = tt.switch(
            tt.gt(alpha, 2),
            (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2),
            np.inf)

        assert_negative_support(alpha, 'alpha', 'Pareto')
        assert_negative_support(m, 'm', 'Pareto')
Example #26
0
    def fprop(self, state_below):
        print "======fprop====="
        
        
        rng = RandomStreams(seed=234)

        #size = theano.tensor.as_tensor_variable((state_below.shape[0], self.dim))
        self.noise = rng.normal(size=(state_below.shape[0], self.dim), avg=0, std=self.std)
        #self.noise = T.log(un/(1-un))
        p = self._linear_part(state_below) + self.noise

        batch_size = (p.shape[0]).astype(config.floatX)
        self.active_rate = T.gt(p, self.threshold).sum(axis=0, dtype=config.floatX) / batch_size
        
        return T.gt(p, self.threshold) * p
Example #27
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) -
                n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2,
            gt(n, (p - 1)),
            all(gt(eigh(X)[0], 0)),
            eq(X, X.T)
        )
Example #28
0
 def test_v2(self):
     q_matrix = self.q
     d_matrix = self.d[0 :: self.negative_d_num + 1]
     consine_vector = self.compute_cosine_between_matrixes(q_matrix, d_matrix)
     for i in range(1, self.negative_d_num + 1):
         q_matrix = self.q
         d_matrix = self.d[i :: self.negative_d_num + 1]
         consine_vector = T.concatenate([consine_vector, self.compute_cosine_between_matrixes(q_matrix, d_matrix)])
     components_reshape = T.reshape(consine_vector, (self.negative_d_num + 1, self.mini_batch_size)).T
     gt_1 = T.sum(T.gt(components_reshape[:, 0], components_reshape[:, 1]))
     gt_2 = T.sum(T.gt(components_reshape[:, 0], components_reshape[:, 2]))
     gt_3 = T.sum(T.gt(components_reshape[:, 0], components_reshape[:, 3]))
     gt_4 = T.sum(T.gt(components_reshape[:, 0], components_reshape[:, 4]))
     gt_sum = gt_1 + gt_2 + gt_3 + gt_4
     return components_reshape, gt_sum * 1.0 / (self.mini_batch_size * self.negative_d_num)
Example #29
0
def drop(input_value, dropout):
	if T.gt(dropout, 0.):
		retain_prob = 1 - dropout
		mask = srng.binomial(n=1, p=retain_prob, size=input_value.shape, dtype='floatX')
		return input_value * mask / retain_prob
	else:
		return input_value
Example #30
0
		def timestep(predictions, label, len_example, total_len_example):

			label_binary = T.gt(label[0:len_example-1], 0)
			oov_count = T.shape(label_binary)[0] - T.sum(label_binary)
			
			a = total_len_example
			return T.sum(T.log( 1./ predictions[T.arange(len_example-1), label[0:len_example-1]]) * label_binary ), oov_count
Example #31
0
        def step_fn(curr, r, h_prev):

            if T.gt(r, 0):
                curr_hid = self.alpha * h_prev
                curr_hid += (1 - self.alpha) *\
                    self.f(curr + T.dot(self.W2, h_prev))

            else:
                curr_hid = self.f(curr)

            curr_hid = T.flatten(curr_hid)
            return curr_hid
Example #32
0
    def logp(self, value):
        mu = self.mu
        sigma = self.sigma
        nu = self.nu

        # This condition suggested by exGAUS.R from gamlss
        lp = T.switch(T.gt(nu,  0.05 * sigma),
                      - T.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu)**2
                      + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.),
                      - T.log(sigma * T.sqrt(2 * np.pi))
                      - 0.5 * ((value - mu) / sigma)**2)
        return bound(lp, sigma > 0., nu > 0.)
Example #33
0
 def loss_confident_bootstrapping(self, y, factor=1):
     #Customized categorical cross entropy.
     #Based on the multibox impl. More tuned to paper. More strict
     p = self.output
     #Only confident predictions are included. Everything between 0.2 and 0.8 is disregarded. 60% of the range.
     hardUpper = T.gt(p, 0.8)
     hardLower = T.le(p, 0.2)
     loss = (-T.sum(
         ((factor * y) + ((1.0 - factor) * hardUpper)) * T.log(p)) - T.sum(
             ((factor * (1.0 - y)) +
              ((1.0 - factor) * hardLower)) * T.log(1.0 - p)))
     return loss / self.size
Example #34
0
    def __init__(self, alpha, beta=1, *args, **kwargs):
        super(InverseGamma, self).__init__(*args, **kwargs)
        self.alpha = alpha = tt.as_tensor_variable(alpha)
        self.beta = beta = tt.as_tensor_variable(beta)

        self.mean = self._calculate_mean()
        self.mode = beta / (alpha + 1.)
        self.variance = tt.switch(tt.gt(alpha, 2),
                                  (beta**2) / (alpha * (alpha - 1.)**2),
                                  np.inf)
        assert_negative_support(alpha, 'alpha', 'InverseGamma')
        assert_negative_support(beta, 'beta', 'InverseGamma')
Example #35
0
    def logp(self, value):
        mu = self.mu
        alpha = self.alpha
        negbinom = bound(binomln(value + alpha - 1, value)
                         + logpow(mu / (mu + alpha), value)
                         + logpow(alpha / (mu + alpha), alpha),
                         value >= 0, mu > 0, alpha > 0)

        # Return Poisson when alpha gets very large.
        return tt.switch(tt.gt(alpha, 1e10),
                         Poisson.dist(self.mu).logp(value),
                         negbinom)
Example #36
0
 def loss_crosstrapping(self, y, factor=1):
     #Almost the same as bootstrapping, except mean used for overall result.
     #More closely follows crossentropy implementation.
     #When factor is 1, crossentropy equals this implementation. So performance
     #without decreasing factor should be the same!
     p = self.output
     hard = T.gt(p, 0.5)
     cross = -(((factor * y * T.log(p)) +
                ((1.0 - factor) * hard * T.log(p))) +
               ((factor * (1.0 - y) * T.log(1.0 - p)) +
                ((1.0 - factor) * (1.0 - hard) * T.log(1.0 - p))))
     return T.mean(cross)
Example #37
0
    def step(i, x, *args):
        x_i = x[T.arange(x.shape[0]), i]
        x_reversed = T.set_subtensor(x_i, 1.0 - x_i)
        merged = T.concatenate([x, x_reversed], axis=0)

        eng = energy(merged).flatten()
        eng_x = eng[:x.shape[0]]
        eng_r = eng[x.shape[0]:]
        cond = T.gt(eng_x, eng_r)
        # The update values
        updated = T.switch(cond, x_i, 1.0 - x_i)
        return T.set_subtensor(x_i, updated)
Example #38
0
    def in_transit(self, t, r=0.0, texp=None):
        """Get a list of timestamps that are in transit

        Args:
            t (vector): A vector of timestamps to be evaluated.
            r (Optional): The radii of the planets.
            texp (Optional[float]): The exposure time.

        Returns:
            The indices of the timestamps that are in transit.

        """

        z = tt.zeros_like(self.a)
        r = tt.as_tensor_variable(r) + z
        R = self.r_star + z

        # Wrap the times into time since transit
        hp = 0.5 * self.period
        dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp

        if self.ecc is None:
            # Equation 14 from Winn (2010)
            k = r / self.r_star
            arg = tt.square(1 + k) - tt.square(self.b)
            hdur = hp * tt.arcsin(self.r_star / self.a *
                                  tt.sqrt(arg) / self.sin_incl) / np.pi
            t_start = -hdur
            t_end = hdur
            flag = z

        else:
            M_contact = self.contact_points_op(
                self.a, self.ecc, self.cos_omega, self.sin_omega,
                self.cos_incl + z, self.sin_incl + z, R + r)
            flag = M_contact[2]

            t_start = (M_contact[0] - self.M0) / self.n
            t_start = tt.mod(t_start + hp, self.period) - hp
            t_end = (M_contact[1] - self.M0) / self.n
            t_end = tt.mod(t_end + hp, self.period) - hp

        if texp is not None:
            t_start -= 0.5*texp
            t_end += 0.5*texp

        mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1)
        result = ifelse(tt.and_(tt.all(tt.eq(flag, 0)),
                                tt.all(tt.gt(t_end, t_start))),
                        tt.arange(t.size)[mask],
                        tt.arange(t.size))

        return result
Example #39
0
    def logp(self, value):
        psi = self.psi
        theta = self.theta

        logp_val = tt.switch(tt.gt(value, 0),
                     tt.log(psi) + self.pois.logp(value),
                     logaddexp(tt.log1p(-psi), tt.log(psi) - theta))

        return bound(logp_val,
            0 <= value,
            0 <= psi, psi <= 1,
            0 <= theta)
Example #40
0
    def __init__(self,
                 f,
                 θs,
                 α=0.001,
                 β1=0.9,
                 β2=0.999,
                 β3=0.999,
                 k=0.1,
                 K=10.,
                 ε=1e-8,
                 dec=0.):
        α, β1, β2, β3, ε, dec = [
            np.cast[floatX](h) for h in [α, β1, β2, β3, ε, dec]
        ]

        t = theano.shared(0, name="t")
        t_u = (t, t + 1)

        f_prev = theano.shared(np.cast[floatX](0), name="f_prev")

        ch_fact_lbound = T.switch(T.gt(f, f_prev), 1 + k, 1 / (1 + K))
        ch_fact_ubound = T.switch(T.gt(f, f_prev), 1 + K, 1 / (1 + k))
        f_ch_fact = f / f_prev
        f_ch_fact = T.switch(T.lt(f_ch_fact, ch_fact_lbound), ch_fact_lbound,
                             f_ch_fact)
        f_ch_fact = T.switch(T.gt(f_ch_fact, ch_fact_ubound), ch_fact_ubound,
                             f_ch_fact)
        f_hat = T.switch(T.gt(t_u[1], 1), f_prev * f_ch_fact, f)
        f_u = (f_prev, f_hat)

        self.ms = [
            theano.shared(np.zeros(θ.shape.eval(), dtype=floatX),
                          borrow=True,
                          name="m") for θ in θs
        ]
        self.vs = [
            theano.shared(np.zeros(θ.shape.eval(), dtype=floatX),
                          borrow=True,
                          name="v") for θ in θs
        ]

        d = theano.shared(one, name="d")
        d_den = T.switch(T.gt(f_hat, f_prev), f_prev, f_hat)
        d_t = (β3 * d) + (one - β3) * T.abs_((f_hat - f_prev) / d_den)
        d_t = T.switch(T.gt(t_u[1], one), d_t, one)
        d_u = (d, d_t)

        gs = T.grad(f, θs)

        m_us = [(m, β1 * m + (one - β1) * g) for m, g in zip(self.ms, gs)]
        m_hats = [m_u[1] / (one - T.pow(β1, t_u[1])) for m_u in m_us]

        v_us = [(v, β2 * v + (one - β2) * T.sqr(g))
                for v, g in zip(self.vs, gs)]
        v_hats = [v_u[1] / (one - T.pow(β2, t_u[1])) for v_u in v_us]

        θ_us = [(θ, θ - (α / (one + (t_u[1] * dec))) * m_hat /
                 ((T.sqrt(v_hat) * d_t) + ε))
                for θ, m_hat, v_hat in zip(θs, m_hats, v_hats)]
        self.updates = m_us + v_us + [t_u, f_u, d_u] + θ_us
Example #41
0
    def multiclassRealPosAndNegAndTruePredPosNegTraining0OrValidation1(
            self, y, training0OrValidation1):
        """
	The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
	For class_i == 0 (backgr), what is reported is the WHOLE rp,rn,tpp,tpn. ie, as calculated considering background VS all other classes.
	Order in the list is the natural order of the classes (ie class-0-WHOLE RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
	"""
        returnedListWithNumberOfRpRnPpPnForEachClass = []

        for class_i in xrange(0, self.numberOfOutputClasses):
            #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
            vectorOneAtRealPositives = T.gt(y, 0) if class_i == 0 else T.eq(
                y, class_i)
            vectorOneAtRealNegatives = T.eq(y, 0) if class_i == 0 else T.neq(
                y, class_i)

            if training0OrValidation1 == 0:  #training:
                yPredToUse = self.y_pred
            else:  #validation
                yPredToUse = self.y_pred_inference

            vectorOneAtPredictedPositives = T.gt(
                yPredToUse, 0) if class_i == 0 else T.eq(yPredToUse, class_i)
            vectorOneAtPredictedNegatives = T.eq(
                yPredToUse, 0) if class_i == 0 else T.neq(yPredToUse, class_i)
            vectorOneAtTruePredictedPositives = T.and_(
                vectorOneAtRealPositives, vectorOneAtPredictedPositives)
            vectorOneAtTruePredictedNegatives = T.and_(
                vectorOneAtRealNegatives, vectorOneAtPredictedNegatives)

            returnedListWithNumberOfRpRnPpPnForEachClass.append(
                T.sum(vectorOneAtRealPositives))
            returnedListWithNumberOfRpRnPpPnForEachClass.append(
                T.sum(vectorOneAtRealNegatives))
            returnedListWithNumberOfRpRnPpPnForEachClass.append(
                T.sum(vectorOneAtTruePredictedPositives))
            returnedListWithNumberOfRpRnPpPnForEachClass.append(
                T.sum(vectorOneAtTruePredictedNegatives))

        return returnedListWithNumberOfRpRnPpPnForEachClass
Example #42
0
    def _step(
            i,
            pkm1, pkm2, qkm1, qkm2,
            k1, k2, k3, k4, k5, k6, k7, k8, r
    ):
        xk = -(x * k1 * k2) / (k3 * k4)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        xk = (x * k5 * k6) / (k7 * k8)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        old_r = r
        r = tt.switch(tt.eq(qk, zero), r, pk/qk)

        k1 += one
        k2 += k26update
        k3 += two
        k4 += two
        k5 += one
        k6 -= k26update
        k7 += two
        k8 += two

        big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG)
        biginv_cond = tt.or_(
            tt.lt(tt.abs_(qk), BIGINV),
            tt.lt(tt.abs_(pk), BIGINV)
        )

        pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2)
        pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1)
        qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2)
        qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1)

        pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2)
        pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1)
        qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2)
        qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1)

        return ((pkm1, pkm2, qkm1, qkm2,
                 k1, k2, k3, k4, k5, k6, k7, k8, r),
                until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))))
Example #43
0
    def from_partial_old(self, X, dX):
        eps = 1e-10  #np.spacing(1)
        U, S, V = X
        dU, dS, dV = dX
        S = tensor.diag(S)
        S_pinv = tensor.switch(tensor.gt(abs(S), eps), 1.0 / S, 0.0)
        S_pinv = tensor.diag(S_pinv)
        ZV = dU.dot(S_pinv)
        UtZV = dS
        ZtU = S_pinv.dot(dV)

        Zproj = (ZV - U.dot(UtZV), UtZV, ZtU - (UtZV.dot(V)))
        return Zproj
Example #44
0
def triplet_loss(predictions, triplets):

    # Valeur de alpha
    a = np.float32(0.2)

    # On calcul les distances entre les representations de anchor/positive et anchor/negative
    dist1 = ((predictions[triplets[:,0]] - predictions[triplets[:,1]])**2).sum(axis=1)
    dist2 = ((predictions[triplets[:, 0]] - predictions[triplets[:, 2]]) ** 2).sum(axis=1)
    s = dist1 - dist2 + a
    # On calcul la loss
    loss = s * T.gt(s, 0.0)

    return loss
Example #45
0
 def build_target_label_prediction(self, valid_out, loss_type, K):
     """
     Picks the target vector for each class, as well as a strategy for
     picking the predicted label from the network output
     """
     if re.search('one-hot', loss_type):
         # should be used with softmax out
         identity = numpy.identity(K)
         pred_valid = T.argmax(valid_out, axis=1)
         klass_targets = identity
     elif re.search('nnrank', loss_type):
         # should be used with sigmoid out
         if self.num_output_classes == K:
             nnrank_target = numpy.tril(numpy.ones((K,K)))
             pred_valid = T.sum(T.gt(valid_out, 0.5), axis=1) - 1 # can potentially return -1
         elif self.num_output_classes == (K-1):
             nnrank_target = numpy.array([[0]*(K-1)] + numpy.tril(numpy.ones((K-1,K-1))).tolist())
             # TODO check for discontinuities rather than assuming none with the sum
             # TODO do better than a shared threshold
             pred_valid = T.sum(T.gt(valid_out, 0.5), axis=1)
         klass_targets = nnrank_target
     return(theano.shared(lasagne.utils.floatX((klass_targets))),pred_valid)
def define_test_functions(disc_nonlinearity, prediction, prediction_det,
                           target_var_sup):
    if disc_nonlinearity in ["sigmoid", "softmax", "softmax_hierarchy"]:
        if disc_nonlinearity == "sigmoid":
            test_pred = T.gt(prediction_det, 0.5)
            test_acc = T.mean(T.eq(test_pred, target_var_sup),
                              dtype=theano.config.floatX) * 100.

        elif disc_nonlinearity in ["softmax", "softmax_hierarchy"]:
            test_pred = prediction_det.argmax(1)
            test_acc = T.mean(T.eq(test_pred, target_var_sup.argmax(1)),
                              dtype=theano.config.floatX) * 100
        return test_acc, test_pred
Example #47
0
    def logp(self, value):
        psi = self.psi
        p = self.p
        n = self.n

        logp_val = tt.switch(
            tt.gt(value, 0),
            tt.log(psi) + self.bin.logp(value),
            logaddexp(tt.log1p(-psi),
                      tt.log(psi) + n * tt.log1p(-p)))

        return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1,
                     0 <= p, p <= 1)
    def get_output_for(self, input, deterministic=False, **kwargs):

        # print(super(snn_denseLayer, self).get_output_for(input, **kwargs))
        self.input = input
        v = self.v_in + super(snn_denseLayer, self).get_output_for(
            input, **kwargs)
        # v=super(snn_denseLayer, self).get_output_for(input, **kwargs)
        vmax = T.max(v)
        flag = T.gt(vmax, self.threshold)
        self.output_spike = T.switch(T.eq(vmax, v), flag, 0.0)
        self.v_out = flag * self.refractory_voltage + (1.0 - flag) * v
        #sample_net.do_stdp()
        return self.output_spike
Example #49
0
    def logp(self, value):
        alpha = self.alpha
        mu = self.mu
        psi = self.psi

        logp_val = tt.switch(tt.gt(value, 0),
                     tt.log(psi) + self.nb.logp(value),
                     logaddexp(tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))))

        return bound(logp_val,
                    0 <= value,
                    0 <= psi, psi <= 1,
                    mu > 0, alpha > 0)
Example #50
0
def triplet_loss(predictions, triplets):

    #loss = 0.0
    a = np.float32(0.2)

    dist1 = ((predictions[triplets[:, 0]] -
              predictions[triplets[:, 1]])**2).sum(axis=1)
    dist2 = ((predictions[triplets[:, 0]] -
              predictions[triplets[:, 2]])**2).sum(axis=1)
    s = dist1 - dist2 + a
    loss = s * T.gt(s, 0.0)

    return loss
Example #51
0
 def _get_updates_for(self, param, grad):
     grad_tm1 = util.shared_like(param, 'grad')
     step_tm1 = util.shared_like(param, 'step', self.learning_rate.eval())
     test = grad * grad_tm1
     diff = TT.lt(test, 0)
     steps = step_tm1 * (TT.eq(test, 0) +
                         TT.gt(test, 0) * self.step_increase +
                         diff * self.step_decrease)
     step = TT.minimum(self.max_step, TT.maximum(self.min_step, steps))
     grad = grad - diff * grad
     yield param, TT.sgn(grad) * step
     yield grad_tm1, grad
     yield step_tm1, step
Example #52
0
    def __init__(self,input,truth,mask):
        self.output_shape = input.output_shape
        Layer.linkstruct[input].append(self)
        #2 parts, 0<o<t t<o

        diff = truth.resp*0.8 - input.output
        diff = T.switch(T.gt(diff,0),diff,0)
        diff = T.switch(T.eq(truth.resp,0),1,diff)
        diffsmul = T.prod(diff, axis=1)
        loss = T.sum(diffsmul * mask)
        self.loss = loss
        self.output = truth.resp
        self.output_shape = truth.resp_shape
Example #53
0
        def OneStep(alpha1, b1, alpha2, b2):
            alpha1_new = (T.abs_(b1 * D * W).sum() /
                          T.abs_(b1 * D).sum()).astype('float32')
            b1_new = T.switch(T.gt(W / alpha1_new, 0.5), 1., 0.)
            alpha2_new = (T.abs_(b2 * D * W).sum() /
                          T.abs_(b2 * D).sum()).astype('float32')
            b2_new = T.switch(T.lt(W / alpha2_new, -0.5), -1., 0.)

            delta1 = T.abs_(alpha1_new - alpha1)
            delta2 = T.abs_(alpha2_new - alpha2)
            condition = T.lt(delta1, 1e-6) and T.lt(delta2, 1e-6)
            return [alpha1_new, b1_new, alpha2_new,
                    b2_new], theano.scan_module.until(condition)
Example #54
0
 def get_monitoring_channels_from_state(self, state, target=None):
     channels = super(MultiSigmoid,
                      self).get_monitoring_channels_from_state(
                          state, target)
     for c in channels:
         if 'misclass' in c:
             del channels[c]
     z, = state.owner.inputs
     geo = T.nnet.sigmoid(z.mean(axis=1).dimshuffle(0, 'x'))
     geo_class = T.gt(geo, 0.5)
     misclass = T.cast(T.neq(geo_class, target), config.floatX).mean()
     channels['misclass'] = misclass
     return channels
Example #55
0
    def __init__(self,
                 input,
                 n_in,
                 n_out,
                 is_binary=False,
                 threshold=0.4,
                 rng=None):
        """
        Initialize the parameters of the logistic regression.
        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)
        :type n_in: int
        :param n_in: number of input units, the dimension of the space in which
        the datapoints lie
        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie (number of classes)
        """
        self.activation = T.nnet.sigmoid
        self.threshold = threshold
        super(LogisticRegressionLayer, self).__init__(input, n_in, n_out,
                                                      self.activation, rng)

        self.reset_layer()

        self.is_binary = is_binary
        if n_out == 1:
            self.is_binary = True
        # The number of classes
        self.n_classes_seen = np.zeros(n_out)
        # The number of the wrong classification madefor the class i
        self.n_wrong_classif_made = np.zeros(n_out)

        self.reset_conf_mat()

        # Compute vector class-membership probablities in symbolic form
        # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+ self.b)
        self.p_y_given_x = self.get_class_memberships(self.input)

        if not self.is_binary:
            # Compute prediction as class whose probability is maximal
            # in symbolic form
            self.y_decision = T.argmax(self.p_y_given_x, axis=1)
        else:
            # If the probability is greater than the specified threshold
            # assign to the class 1, otherwise it is 0. Which alos can be
            # checked if p(y=1|x) > threshold.
            self.y_decision = T.gt(T.flatten(self.p_y_given_x), self.threshold)

        self.params = [self.W, self.b]
Example #56
0
def rprop(param,
          learning_rate,
          gparam,
          mask,
          updates,
          current_cost,
          previous_cost,
          eta_plus=1.2,
          eta_minus=0.5,
          max_delta=50,
          min_delta=10e-6):
    previous_grad = sharedX(numpy.ones(param.shape.eval()), borrow=True)
    delta = sharedX(learning_rate * numpy.ones(param.shape.eval()),
                    borrow=True)
    previous_inc = sharedX(numpy.zeros(param.shape.eval()), borrow=True)
    zero = T.zeros_like(param)
    one = T.ones_like(param)
    change = previous_grad * gparam

    new_delta = T.clip(
        T.switch(
            T.eq(gparam, 0.), delta,
            T.switch(T.gt(change, 0.), delta * eta_plus,
                     T.switch(T.lt(change, 0.), delta * eta_minus, delta))),
        min_delta, max_delta)
    new_previous_grad = T.switch(
        T.eq(mask * gparam, 0.), previous_grad,
        T.switch(T.gt(change, 0.), gparam,
                 T.switch(T.lt(change, 0.), zero, gparam)))
    inc = T.switch(
        T.eq(mask * gparam, 0.), zero,
        T.switch(T.gt(change, 0.), -T.sgn(gparam) * new_delta,
                 T.switch(T.lt(change, 0.), zero, -T.sgn(gparam) * new_delta)))

    updates.append((previous_grad, new_previous_grad))
    updates.append((delta, new_delta))
    updates.append((previous_inc, inc))
    return param + inc * mask
Example #57
0
def momentum(cost, params, current_epoch, lr, init_momentum):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        vel = theano.shared(p.get_value() * 0.)
        current_momentum = theano.shared(floatX(init_momentum))
        vel_new = current_momentum * vel - lr * g
        momentum_new = T.le(current_epoch, 200.) * (
            current_epoch *
            (0.99 - 0.09) / 200. + 0.09) + T.gt(current_epoch, 200.) * 0.99
        updates.append((vel, vel_new))
        updates.append((p, p + vel_new))
        updates.append((current_momentum, momentum_new))
    return updates
Example #58
0
    def find_right_bound(prev_func_output, step, maxstep):
        func_output = f(step)
        is_output_decrease = T.gt(prev_func_output, func_output)
        step = ifelse(
            is_output_decrease,
            T.minimum(2. * step, maxstep),
            step
        )

        is_output_increse = T.lt(prev_func_output, func_output)
        stoprule = theano.scan_module.until(
            T.or_(is_output_increse, step > maxstep)
        )
        return [func_output, step], stoprule
Example #59
0
    def __mapper(self, train_example):
        pos_triple, neg_triple = train_example[0:3], train_example[3:]

        unconstrained_objective = self.margin - self.__objective_triple(neg_triple) \
                                  + self.__objective_triple(pos_triple)

        entity_normalize = T.sum(T.square(self.Entity.norm(2, axis=0)) - 1)
        relation_normalize = T.square(self.Relation.norm(2, axis=0))
        surface_normalize = T.square(T.diagonal(T.dot(self.RelationNormal.T, self.Relation))) / relation_normalize

        surface_normalize = T.sum(surface_normalize - self.epsilon ** 2)

        unconstrained_objective_positive = ifelse(T.gt(unconstrained_objective, theano.shared(0.0)),
                                                  unconstrained_objective, theano.shared(0.0))

        entity_normalize_positive = ifelse(T.gt(entity_normalize, theano.shared(0.0)),
                                           entity_normalize, theano.shared(0.0))

        surface_normalize_positive = ifelse(T.gt(surface_normalize, theano.shared(0.0)),
                                            surface_normalize, theano.shared(0.0))

        return unconstrained_objective_positive + self.regularize_factor \
                                                  * (surface_normalize_positive + entity_normalize_positive)
Example #60
0
    def get_probs(self):
        t = self.temperatures
        t_term = (1. / t - T.roll(1. / t, shift=-1))
        t_term = T.set_subtensor(t_term[-1], 0)

        e_term = self.energy_(self.pps) - T.roll(self.energy_(self.pps),
                                                 shift=-1)
        e_term = T.set_subtensor(e_term[-1], 0.)
        probs = T.exp(t_term * e_term)

        actions = T.cast(T.gt(probs, self.t_rng.uniform((probs.shape))), fx)

        add = T.concatenate([[np.cast[fx](0.)], actions])

        add = T.roll(add, shift=-1) - add
        add = add[:-1]
        add = T.switch(T.gt(add, 0), 1., 0.)
        add = T.set_subtensor(add[-1], 0.)
        add = add - T.roll(add, shift=1)
        idx = T.arange(actions.shape[0], dtype=fx)
        idx = idx + add

        return self.energy_(self.pps)