Beispiel #1
0
def _get_targets(y, log_y_hat, y_mask, y_hat_mask):
    '''
    Returns the target values according to the CTC cost with respect to y_hat.
    Note that this is part of the gradient with respect to the softmax output
    and not with respect to the input of the original softmax function.
    All computations are done in log scale
    '''
    num_classes = log_y_hat.shape[2] - 1
    blanked_y, blanked_y_mask = _add_blanks(
        y=y,
        blank_symbol=num_classes,
        y_mask=y_mask)

    log_alpha, log_beta = _log_forward_backward(blanked_y,
                                                log_y_hat, blanked_y_mask,
                                                y_hat_mask, num_classes)
    # explicitly not using a mask to prevent inf - inf
    y_prob = _class_batch_to_labeling_batch(blanked_y, log_y_hat,
                                            y_hat_mask=None)
    marginals = log_alpha + log_beta - y_prob
    max_marg = marginals.max(2)
    max_marg = T.switch(T.le(max_marg, -np.inf), 0, max_marg)
    log_Z = T.log(T.exp(marginals - max_marg[:,:, None]).sum(2))
    log_Z = log_Z + max_marg
    log_Z = T.switch(T.le(log_Z, -np.inf), 0, log_Z)
    targets = _labeling_batch_to_class_batch(blanked_y,
                                             T.exp(marginals -
                                                   log_Z[:,:, None]),
                                             num_classes + 1)
    return targets
Beispiel #2
0
def get_rain_level(vals):
    return TT.switch(
        TT.le(vals, 0.1), 0,
        TT.switch(
            TT.le(vals, 2.5), 1,
            TT.switch(TT.le(vals, 8.0), 2, TT.switch(TT.le(vals, 16.0), 3,
                                                     4))))
Beispiel #3
0
 def get_output_for(self, input, **kwargs):
     if self.tied_feamap:
         return input * T.gt(input, 0) + input * T.le(input, 0) \
              * T.shape_padleft(T.shape_padright(self.W[seg], n_ones = len(input_dim) - 2))
     else:
         return input * T.gt(input, 0) + input * T.le(input, 0) \
              * T.shape_padleft(self.W)
Beispiel #4
0
def depth(r, b):
    # depth = 1 - s0 / pi; where s0 is from Agol+
    b = tt.abs_(b)
    r = tt.abs_(r)
    b2 = b ** 2
    r2 = r ** 2
    opr = 1 + r
    omr = 1 - r
    rmo = r - 1

    # Case 2
    a = kite_area(r, b)
    twoa = 2 * a
    k0 = tt.arctan2(twoa, rmo * opr + b2)
    k1 = tt.arctan2(twoa, omr * opr + b2)
    case2 = (k1 + r2 * k0 - a) / np.pi

    return tt.switch(
        tt.le(opr, b),
        tt.zeros_like(r),
        tt.switch(
            tt.and_(tt.lt(tt.abs_(omr), b), tt.lt(b, opr)),
            case2,
            tt.switch(tt.le(b, omr), r2, tt.ones_like(r)),
        ),
    )
Beispiel #5
0
def sticky_ALB(o, t, o2, v, alpha_0, beta_0, alpha_1, beta_1, d, tau_p, tau_n, unchosen_p, b, stickiness):
    b = 1. / b  # Convert inverse temperature to temperature

    # Implements choice stickiness
    unchosen_0 = T.switch(T.le(v, 0.5), 1, unchosen_p)
    unchosen_1 = T.switch(T.gt(v, 0.5), 1, unchosen_p)

    # Only update if outcome isn't missing
    alpha_0 = T.switch(T.ge(o, 0), (1 - d) * alpha_0 + (o * tau_p * unchosen_0), alpha_0)
    beta_0 = T.switch(T.ge(o, 0), (1 - d) * beta_0 + ((1 - o) * tau_n * unchosen_0), beta_0)

    alpha_1 = T.switch(T.ge(o2, 0), (1 - d) * alpha_1 + (o2 * tau_p * unchosen_1), alpha_1)
    beta_1 = T.switch(T.ge(o2, 0), (1 - d) * beta_1 + ((1 - o2) * tau_n * unchosen_1), beta_1)

    value_0 = alpha_0 / (alpha_0 + beta_0)
    value_1 = alpha_1 / (alpha_1 + beta_1)

    value_0 = T.switch(T.le(v, 0.5), T.pow(value_0, stickiness), value_0)
    value_1 = T.switch(T.gt(v, 0.5), T.pow(value_1, stickiness), value_1)

    value = ((value_0 - value_1) + 1) / 2.

    var_0 = (alpha_0 * beta_0) / (T.pow(alpha_0 + beta_0, 2) * (alpha_0 + beta_0 + 1))
    var_1 = (alpha_1 * beta_1) / (T.pow(alpha_1 + beta_1, 2) * (alpha_1 + beta_1 + 1))

    value = np.exp(b * value) / (np.exp(b * value) + np.exp(b * (1 - value)))

    return (value, alpha_0, beta_0, alpha_1, beta_1, var_0, var_1, value_0, value_1, o, o2)
Beispiel #6
0
    def prepareTraining(self):
        '''
        Prepares the relevant functions
        (details on neural_net_creator's prepareTraining)
        '''
        #loss objective to minimize
        self.prediction = lasagne.layers.get_output(self.network)
        self.prediction=self.prediction[:,0]
        #self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var)
        #the loss is now the squared error in the output
        self.loss =  lasagne.objectives.squared_error(self.prediction, self.target_var)
        self.loss = self.loss.mean()

        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.updates = lasagne.updates.nesterov_momentum(
                self.loss, self.params, learning_rate=0.01, momentum=0.9)

        self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True)
        self.test_prediction=self.test_prediction[:,0]
        self.test_loss = lasagne.objectives.squared_error(self.test_prediction, self.target_var)
        self.test_loss = self.test_loss.mean()
        #the accuracy is now the number of sample that achieve a 0.01 precision (can be changed)
        self.test_acc = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.01)
                            , dtype=theano.config.floatX)
        self.test_acc2 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.05)
                            , dtype=theano.config.floatX)
        self.test_acc3 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.1)
                            , dtype=theano.config.floatX)

        self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates)

        self.val_fn = theano.function([self.input_var, self.target_var], [self.test_loss,self.test_acc,self.test_acc2,self.test_acc3])

        self.use = theano.function([self.input_var],[self.test_prediction])
Beispiel #7
0
 def init(self, input, input_shape, posslope, negslope):
     self.input = input
     #        assert( posslope[-1]=="Constant")#learning slope not yet implemented
     #        assert( negslope[-1]=="Constant")
     #
     if negslope[-1] == "Constant":
         self.output = T.ge(input, 0.) * input * posslope[2] + T.lt(
             input, 0.) * input * negslope[2]
         self.input_shape = input_shape
         self.output_shape = input_shape
         self.params = []
     elif negslope[-1] == "Learn":
         self.pos_slope_sym = theano.shared(
             posslope[2] *
             np.ones([input_shape[0], 1], dtype=theano.config.floatX),
             borrow=True,
             broadcastable=(False, True))
         self.neg_slope_sym = theano.shared(
             negslope[2] *
             np.ones([input_shape[0], 1], dtype=theano.config.floatX),
             borrow=True,
             broadcastable=(False, True))
         self.output = T.ge(input, 0) * input * T.ge(
             self.pos_slope_sym, posslope[0]) * T.le(
                 self.pos_slope_sym, posslope[1]
             ) * self.pos_slope_sym + T.lt(input, 0) * input * T.ge(
                 self.neg_slope_sym, negslope[0]) * T.le(
                     self.neg_slope_sym, negslope[1]) * self.neg_slope_sym
         self.params = [self.pos_slope_sym, self.neg_slope_sym]
         self.input_shape = input_shape
         self.output_shape = input_shape
     else:
         assert (False)
Beispiel #8
0
def get_targets(y, log_y_hat, y_mask, y_hat_mask):
    """
    Returns the target values according to the CTC cost with respect to y_hat.
    Note that this is part of the gradient with respect to the softmax output
    and not with respect to the input of the original softmax function.
    All computations are done in log scale
    """
    # log_y_hat is input_seq_len x num_batch x num_classes + 1
    num_classes = log_y_hat.shape[2] - 1
    blanked_y, blanked_y_mask = _add_blanks(y=y,
                                            blank_symbol=num_classes,
                                            y_mask=y_mask)

    log_alpha, log_beta = _log_forward_backward(blanked_y, log_y_hat,
                                                blanked_y_mask, y_hat_mask,
                                                num_classes)
    # explicitly not using a mask to prevent inf - inf
    y_prob = _class_batch_to_labeling_batch(blanked_y,
                                            log_y_hat,
                                            y_hat_mask=None)
    marginals = log_alpha + log_beta - y_prob
    max_marg = marginals.max(2)
    max_marg = T.switch(T.le(max_marg, -numpy.inf), 0, max_marg)
    log_Z = T.log(T.exp(marginals - max_marg[:, :, None]).sum(2))
    log_Z = log_Z + max_marg
    log_Z = T.switch(T.le(log_Z, -numpy.inf), 0, log_Z)
    targets = _labeling_batch_to_class_batch(
        blanked_y, T.exp(marginals - log_Z[:, :, None]), num_classes + 1)
    return targets
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Beispiel #10
0
def depth_grad(r, b):
    # depth = 1 - s0 / pi; where s0 is from Agol+
    b = tt.abs_(b)
    r = tt.abs_(r)
    b2 = b**2
    opr = 1 + r
    omr = 1 - r
    rmo = r - 1

    # Case 2
    a = kite_area(r, b)
    twor = 2 * r
    twoa = 2 * a
    k0 = tt.arctan2(twoa, rmo * opr + b2)
    dr = twor * k0 / np.pi
    db = -twoa / (b * np.pi)

    zero = tt.zeros_like(r)
    return (
        tt.switch(
            tt.le(opr, b),
            zero,
            tt.switch(
                tt.and_(tt.lt(tt.abs_(omr), b), tt.lt(b, opr)),
                dr,
                tt.switch(tt.le(b, omr), twor, zero),
            ),
        ),
        tt.switch(
            tt.le(opr, b),
            zero,
            tt.switch(tt.and_(tt.lt(tt.abs_(omr), b), tt.lt(b, opr)), db,
                      zero),
        ),
    )
Beispiel #11
0
    def inner(x, A, b, u, l, L, eps):
        gradient = T.dot(A, x) + b
        x2 = x - gradient / L
        x2 = T.switch(T.le(x2, l), l, x2)
        x2 = T.switch(T.ge(x2, u), u, x2)

        d = T.max(T.abs_(x - x2))
        condition = T.le(T.max(d), eps)

        return (x2, d), theano.scan_module.until(condition)
		def OneStep(alpha, b):
			# minimize alpha
			alpha_new  = (T.abs_(b*D*W).sum()/T.abs_(b*D).sum()).astype('float32') 
			# minimize b
			tmp_new = T.clip(W/alpha_new, -1., 1.)
			b_new =  T.switch( T.ge(tmp_new, pow(2, -n)), T.pow(2, round3(T.log2(tmp_new)-0.0849625)), 
				T.switch( T.le(tmp_new, -pow(2, -n)), -T.pow(2, round3(T.log2(-tmp_new)-0.0849625)), 0.))		
			b_new = T.switch(T.ge(b_new, pow(2, - (n-1))), b_new, 
				T.switch(T.le(b_new, -pow(2, -(n-1))), b_new, T.sgn(b_new)*pow(2, -(n-1))))
		
			delta = T.abs_(alpha_new-alpha)
			condition = T.lt(delta, 1e-6)
			return [alpha_new, b_new], theano.scan_module.until(condition)
Beispiel #13
0
def apply_mean_stress_theory(m_s_th, sm, rng, sn_0, r_m, r_y):
    rng = ifelse(
        tt.eq(1, m_s_th),
        ifelse(tt.lt(0, sm), rng / (1 - (sm / r_m)),
               ifelse(tt.le(r_m, tt.abs_(sm)), 1.01 * sn_0, rng)),
        ifelse(
            tt.eq(2, m_s_th),
            ifelse(tt.lt(tt.abs_(sm), r_m), rng / (1 - (sm / r_m)**2),
                   ifelse(tt.le(r_m, sm), 1.01 * sn_0, rng)),
            ifelse(
                tt.eq(3, m_s_th),
                ifelse(
                    tt.lt(0, sm) & tt.lt(sm, r_y), rng / 1 - (sm / r_y),
                    ifelse(tt.le(r_y, tt.abs_(sm)), 1.01 * sn_0, rng)), rng)))
    return rng
Beispiel #14
0
        def calc_time_gate(time_input_n):
            # Broadcast the time across all units
            t_broadcast = time_input_n.dimshuffle([0,'x'])
            # Get the time within the period
            in_cycle_time = T.mod(t_broadcast + shift_broadcast, period_broadcast)
            # Find the phase
            is_up_phase = T.le(in_cycle_time, on_mid_broadcast)
            is_down_phase = T.gt(in_cycle_time, on_mid_broadcast)*T.le(in_cycle_time, on_end_broadcast)
            # Set the mask
            sleep_wake_mask = T.switch(is_up_phase, in_cycle_time/on_mid_broadcast,
                                T.switch(is_down_phase,
                                    (on_end_broadcast-in_cycle_time)/on_mid_broadcast,
                                        off_slope*(in_cycle_time/period_broadcast)))

            return sleep_wake_mask
def gate_layer(tparams,
               X_word,
               X_char,
               options,
               prefix,
               pretrain_mode,
               activ='lambda x: x',
               **kwargs):
    """ 
    compute the forward pass for a gate layer

    Parameters
    ----------
    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string, layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    Returns
    -------
    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    """
    # compute gating values, Eq.(3)
    G = tensor.nnet.sigmoid(
        tensor.dot(X_word, tparams[p_name(prefix, 'v')]) +
        tparams[p_name(prefix, 'b')][0])
    X = ifelse(
        tensor.le(pretrain_mode, numpy.float32(1.)),
        ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
        G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)
    return eval(activ)(X)
def concat_layer(tparams,
                 X_word,
                 X_char,
                 options,
                 prefix,
                 pretrain_mode,
                 activ='lambda x: x',
                 **kwargs):
    """ 
    compute the forward pass for a concat layer

    Parameters
    ----------
    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string,  layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    Returns
    -------
    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    """
    X = ifelse(
        tensor.le(pretrain_mode, numpy.float32(1.)),
        ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
        tensor.dot(tensor.concatenate([X_word, X_char], axis=2),
                   tparams[p_name(prefix, 'W')]) +
        tparams[p_name(prefix, 'b')])
    return eval(activ)(X)
 def __init__(self, input, sigma=20.0, window_radius=60):
     self.input = input
     self.sigma = theano.shared(value=np.array(sigma, dtype=theano.config.floatX), name='sigma')
     apply_blur = T.gt(self.sigma, 0.0)
     no_blur = T.le(self.sigma, 0.0)
     self.output = ifelse(no_blur, input, gaussian_filter(input.dimshuffle('x', 0, 1), self.sigma, window_radius)[0, :, :])
     self.params = [self.sigma]
Beispiel #18
0
	def logp_loss3(self, x, y, fake_label,neg_label, pos_ratio = 0.5): #adopt maxout  for  negative   
		# pos_rati0  means  pos examples weight (0.5 means  equal 1:1)


		print "adopt  positives  weight  ............. "+str(pos_ratio)
		y = y.dimshuffle((1,0))
		inx = x.dimshuffle((1,0))
		fake_mask = T.neq(y, fake_label)
		y = y*fake_mask

		pos_mask = T.and_(fake_mask, T.le(y, neg_label-1))*pos_ratio
		neg_mask = T.ge(y, neg_label)*(1- pos_ratio)


		pos_score, neg_score = self.structure2(inx,False)
		maxneg = T.max(neg_score, axis = -1)

		scores = T.concatenate((pos_score, maxneg.dimshuffle((0,1,'x'))), axis = 2)

		d3shape = scores.shape

		#seq*batch , label
		scores = scores.reshape((d3shape[0]*d3shape[1],  d3shape[2]))
		pro = T.nnet.softmax(scores)

		_logp = T.nnet.categorical_crossentropy(pro, y.flatten())

		_logp = _logp.reshape(fake_mask.shape)

		loss = (T.sum(_logp*pos_mask)+ T.sum(_logp*neg_mask))/ (T.sum(pos_mask)+T.sum(neg_mask))
		pos_loss = T.sum(_logp*pos_mask)
		neg_loss = T.sum(_logp*neg_mask)


		return loss, pos_loss, neg_loss
Beispiel #19
0
def tied_neighbours(preds, n_sample_preds, n_classes):
    eps = 1e-8
    #preds = T.clip(preds, eps, 1-eps)
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    earlier_neighbours = preds_per_trial_row[:, :-1]
    later_neighbours = preds_per_trial_row[:, 1:]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    # Example of problem otherwise:
    """
    a = T.fmatrix()
    b = T.fmatrix()
    soft_out_a =softmax(a)
    soft_out_b =softmax(b)
    
    loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1])
    neigh_fn = theano.function([a,b], loss)
    
    neigh_fn(np.array([[0,1000,0]], dtype=np.float32), 
        np.array([[0.1,0.9,0.3]], dtype=np.float32))
    -> inf
    """

    # renormalize(?)

    earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours +
                          T.le(earlier_neighbours, eps) * earlier_neighbours +
                          eps)
    loss = categorical_crossentropy(earlier_neighbours, later_neighbours)
    return loss
Beispiel #20
0
    def compile(self):
        # 1D: n_words, 2D: batch * n_cands
        self.x = T.imatrix()
        self.y = T.fvector()
        self.train_inputs = [self.x, self.y]
        self.pred_inputs = [self.x]

        self.activation = self.args.activation
        self.n_d = self.args.hidden_dim
        self.n_e = self.emb_layers[0].n_d
        self.pad_id = self.emb_layers[0].vocab_map[PAD]
        self.dropout = theano.shared(
            np.float32(self.args.dropout).astype(theano.config.floatX))

        self._set_layers(args=self.args, n_d=self.n_d, n_e=self.n_e)

        ###########
        # Network #
        ###########
        h_in = self._input_layer(x=self.x)
        h = self._mid_layer(h_prev=h_in, x=self.x, pad_id=self.pad_id)
        y_scores = self._output_layer(h=h)
        self.y_pred = T.le(0.5, y_scores)

        #########################
        # Set an objective func #
        #########################
        self.set_params(layers=self.layers)
        self.loss = self.set_loss(self.y, y_scores)
        self.cost = self.set_cost(args=self.args,
                                  params=self.params,
                                  loss=self.loss)
Beispiel #21
0
        def each_loss(outpt, inpt):
            # y 是填充了blank之后的ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n = T.dot(y_nblank, y_nblank)  # 真实的字符长度
            N = 2 * n + 1  # 填充后的字符长度,去除尾部多余的填充
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0),
                                             T.dot(accum, recurrence_relation)
                                             , curr*T.dot(accum, recurrence_relation)),
                sequences=[pred_y],
                outputs_info=[T.eye(N)[0]]
            )
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
    def compare_max(l2_norm, coding_dist):

        result,updates=theano.scan(lambda i,x:T.switch(T.le(x[i],T.constant(1e-12)),T.constant(1e-12),x[i]),\
           outputs_info=None,\
           sequences=T.arange(coding_dist.shape[0]),\
           non_sequences=[l2_norm])
        return result
Beispiel #23
0
def ALB_softmax_health_weighting(o, t, o2, health, v, alpha_0, beta_0, alpha_1, beta_1, d, tau_p, tau_n, unchosen_p, b,
                                 tau_p_w, tau_n_w, decay_w):

    # Without variance weighting
    b = 1. / b  # Convert inverse temperature to temperature

    unchosen_0 = T.switch(T.le(v, 0.5), 1, unchosen_p)
    unchosen_1 = T.switch(T.gt(v, 0.5), 1, unchosen_p)

    health = T.switch(T.lt(health, 0), 0, health)
    tau_p = T.switch(T.ge(tau_p, 0), tau_p * (1 - tau_p_w * health), tau_p * (1 - (1 + tau_p_w * health)))
    tau_n = T.switch(T.ge(tau_n, 0), tau_n * (1 - tau_n_w * health), tau_n * (1 - (1 + tau_n_w * health)))
    d = T.switch(T.ge(tau_p, 0), d * (1 - decay_w * health), d * (1 - (1 + decay_w * health)))

    # Only update if outcome isn't missing
    alpha_0 = T.switch(T.ge(o, 0), (1 - d) * alpha_0 + (o * tau_p * unchosen_0), alpha_0)
    beta_0 = T.switch(T.ge(o, 0), (1 - d) * beta_0 + ((1 - o) * tau_n * unchosen_0), beta_0)

    alpha_1 = T.switch(T.ge(o2, 0), (1 - d) * alpha_1 + (o2 * tau_p * unchosen_1), alpha_1)
    beta_1 = T.switch(T.ge(o2, 0), (1 - d) * beta_1 + ((1 - o2) * tau_n * unchosen_1), beta_1)

    value_0 = alpha_0 / (alpha_0 + beta_0)
    value_1 = alpha_1 / (alpha_1 + beta_1)

    value = ((value_0 - value_1) + 1) / 2.

    var_0 = (alpha_0 * beta_0) / (T.pow(alpha_0 + beta_0, 2) * (alpha_0 + beta_0 + 1))
    var_1 = (alpha_1 * beta_1) / (T.pow(alpha_1 + beta_1, 2) * (alpha_1 + beta_1 + 1))

    value = np.exp(b * value) / (np.exp(b * value) + np.exp(b * (1 - value)))

    return (value, alpha_0, beta_0, alpha_1, beta_1, var_0, var_1, value_0, value_1, o, o2, unchosen_0, unchosen_1)
Beispiel #24
0
def ALB_softmax(o, t, o2, v, alpha_0, beta_0, alpha_1, beta_1, d, tau_p, tau_n, unchosen_p, b):

    # Without variance weighting
    b = 1. / b  # Convert inverse temperature to temperature

    unchosen_0 = T.switch(T.le(v, 0.5), 1, unchosen_p)
    unchosen_1 = T.switch(T.gt(v, 0.5), 1, unchosen_p)

    # Only update if outcome isn't missing
    alpha_0 = T.switch(T.ge(o, 0), (1 - d) * alpha_0 + (o * tau_p * unchosen_0), alpha_0)
    beta_0 = T.switch(T.ge(o, 0), (1 - d) * beta_0 + ((1 - o) * tau_n * unchosen_0), beta_0)

    alpha_1 = T.switch(T.ge(o2, 0), (1 - d) * alpha_1 + (o2 * tau_p * unchosen_1), alpha_1)
    beta_1 = T.switch(T.ge(o2, 0), (1 - d) * beta_1 + ((1 - o2) * tau_n * unchosen_1), beta_1)

    value_0 = alpha_0 / (alpha_0 + beta_0)
    value_1 = alpha_1 / (alpha_1 + beta_1)

    value = ((value_0 - value_1) + 1) / 2.

    var_0 = (alpha_0 * beta_0) / (T.pow(alpha_0 + beta_0, 2) * (alpha_0 + beta_0 + 1))
    var_1 = (alpha_1 * beta_1) / (T.pow(alpha_1 + beta_1, 2) * (alpha_1 + beta_1 + 1))

    value = np.exp(b * value) / (np.exp(b * value) + np.exp(b * (1 - value)))

    return (value, alpha_0, beta_0, alpha_1, beta_1, var_0, var_1, value_0, value_1, o, o2, unchosen_0, unchosen_1)
Beispiel #25
0
    def get_value(self, tau0):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.coefficients

        # Format the lags correctly
        tau0 = tt.abs_(tau0)
        tau = tau0[..., None]

        # Precompute some factors
        dpt = dt + tau
        dmt = dt - tau

        # Real parts:
        # tau > Delta
        crd = cr * dt
        cosh = tt.cosh(crd)
        norm = 2 * ar / crd ** 2
        K_large = tt.sum(norm * (cosh - 1) * tt.exp(-cr * tau), axis=-1)

        # tau < Delta
        crdmt = cr * dmt
        K_small = K_large + tt.sum(norm * (crdmt - tt.sinh(crdmt)), axis=-1)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c ** 2
        d2 = d ** 2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = 1.0 / (dt * c2pd2) ** 2
        k0 = tt.exp(-c * tau)
        cdt = tt.cos(d * tau)
        sdt = tt.sin(d * tau)

        # For tau > Delta
        cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1)
        sin_term = 2 * (tt.sinh(cd) * tt.sin(dd))
        factor = k0 * norm
        K_large += tt.sum(
            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
        )
        K_large += tt.sum(
            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
        )

        # tau < Delta
        edmt = tt.exp(-c * dmt)
        edpt = tt.exp(-c * dpt)
        cos_term = (
            edmt * tt.cos(d * dmt) + edpt * tt.cos(d * dpt) - 2 * k0 * cdt
        )
        sin_term = (
            edmt * tt.sin(d * dmt) + edpt * tt.sin(d * dpt) - 2 * k0 * sdt
        )
        K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
        K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)

        return tt.switch(tt.le(tau0, dt), K_small, K_large)
Beispiel #26
0
    def errors(self):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if self.correct_output.ndim != self.predicted_output.ndim:
            raise TypeError('y should have the same shape as self.y_pred',
                            ('correct_output', self.correct_output.type,
                             'predicted_output', self.predicted_output.type))
        # check if y is of the correct datatype
        if self.correct_output.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.predicted_output, self.correct_output))
        elif self.correct_output.dtype.startswith('float'):
            # First compare the equality of the data the same way numpy.allclose
            # does, then counts the failures.
            return T.sum(
                T.neq(
                    0,
                    T.sum(T.le(self.predicted_output - self.correct_output,
                               1e-05 + 1e-08 * T.abs_(self.correct_output)),
                          axis=1)))
        else:
            raise NotImplementedError()
    def compile(self):
        # 1D: n_words, 2D: batch * n_cands
        self.x = T.imatrix()
        self.y = T.fvector()
        self.train_inputs = [self.x, self.y]
        self.pred_inputs = [self.x]

        self.activation = self.args.activation
        self.n_d = self.args.hidden_dim
        self.n_e = self.emb_layers[0].n_d
        self.pad_id = self.emb_layers[0].vocab_map[PAD]
        self.dropout = theano.shared(np.float32(self.args.dropout).astype(theano.config.floatX))

        self._set_layers(args=self.args, n_d=self.n_d, n_e=self.n_e)

        ###########
        # Network #
        ###########
        h_in = self._input_layer(x=self.x)
        h = self._mid_layer(h_prev=h_in, x=self.x, pad_id=self.pad_id)
        y_scores = self._output_layer(h=h)
        self.y_pred = T.le(0.5, y_scores)

        #########################
        # Set an objective func #
        #########################
        self.set_params(layers=self.layers)
        self.loss = self.set_loss(self.y, y_scores)
        self.cost = self.set_cost(args=self.args, params=self.params, loss=self.loss)
Beispiel #28
0
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
    """ 
    compute the forward pass for a gate layer

    Parameters
    ----------
    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string, layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    Returns
    -------
    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    """      
    # compute gating values, Eq.(3)
    G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0])
    X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),  
               ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
               G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)   
    return eval(activ)(X)
Beispiel #29
0
def RMSprop_v1(tparams, cost, inps, lr, rho=0.9, epsilon=1e-6, cutoff=1e10):
    """ default: lr=0.001 
        This is the implementation of the RMSprop algorithm used in
        http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf.
    """

    grads = tensor.grad(cost, tparams.values())
    norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads]))
    if tensor.ge(norm, 5):
        grads = [g * 5 / norm for g in grads]

    gshared = [
        theano.shared(p.get_value() * 0., name='%s_grad' % k)
        for k, p in tparams.iteritems()
    ]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]
    f_grad_shared = theano.function(inps, cost, updates=gsup)

    updates = []

    for p, g in zip(tparams.values(), gshared):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g**2
        updates.append((acc, acc_new))

        updated_p = p - lr * (g / tensor.sqrt(acc_new + epsilon))
        updated_p = tensor.switch(tensor.ge(updated_p, cutoff), cutoff,
                                  updated_p)
        updated_p = tensor.switch(tensor.le(updated_p, -cutoff), -cutoff,
                                  updated_p)
        updates.append((p, updated_p))

    f_update = theano.function([lr], [], updates=updates)

    return f_grad_shared, f_update
Beispiel #30
0
def tied_neighbours(preds, n_sample_preds, n_classes):
    eps = 1e-8
    #preds = T.clip(preds, eps, 1-eps)
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    earlier_neighbours = preds_per_trial_row[:,:-1]
    later_neighbours = preds_per_trial_row[:,1:]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    # Example of problem otherwise:
    """
    a = T.fmatrix()
    b = T.fmatrix()
    soft_out_a =softmax(a)
    soft_out_b =softmax(b)
    
    loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1])
    neigh_fn = theano.function([a,b], loss)
    
    neigh_fn(np.array([[0,1000,0]], dtype=np.float32), 
        np.array([[0.1,0.9,0.3]], dtype=np.float32))
    -> inf
    """
    
    # renormalize(?)
    
    earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours + 
        T.le(earlier_neighbours, eps) * earlier_neighbours + eps)
    loss = categorical_crossentropy(earlier_neighbours, later_neighbours)
    return loss
Beispiel #31
0
    def logp(self, value):
        p_ = self.p
        k = self.k

        # Clip values before using them for indexing
        value_clip = tt.clip(value, 0, k - 1)

        # We must only check that the values sum to 1 if p comes from a
        # tensor variable, i.e. when p is a step_method proposal. In the other
        # cases we normalize ourselves
        if not isinstance(p_, (numbers.Number, np.ndarray, tt.TensorConstant,
                               tt.sharedvar.SharedVariable)):
            sumto1 = theano.gradient.zero_grad(
                tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5))
            p = p_
        else:
            p = p_ / tt.sum(p_, axis=-1, keepdims=True)
            sumto1 = True

        if p.ndim > 1:
            a = tt.log(np.moveaxis(p, -1, 0)[value_clip])
        else:
            a = tt.log(p[value_clip])

        return bound(a, value >= 0, value <= (k - 1), sumto1,
                     tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1))
Beispiel #32
0
def ALB_var(o, t, o2, v, alpha_0, beta_0, alpha_1, beta_1, d, tau_p,
                                     tau_n, unchosen_p, b, var_weight):

    unchosen_0 = T.switch(T.le(v, 0.5), 1, unchosen_p)
    unchosen_1 = T.switch(T.gt(v, 0.5), 1, unchosen_p)

    # Only update if outcome isn't missing
    alpha_0 = T.switch(T.ge(o, 0), (1 - d) * alpha_0 + (o * tau_p * unchosen_0), alpha_0)
    beta_0 = T.switch(T.ge(o, 0), (1 - d) * beta_0 + ((1 - o) * tau_n * unchosen_0), beta_0)

    alpha_1 = T.switch(T.ge(o2, 0), (1 - d) * alpha_1 + (o2 * tau_p * unchosen_1), alpha_1)
    beta_1 = T.switch(T.ge(o2, 0), (1 - d) * beta_1 + ((1 - o2) * tau_n * unchosen_1), beta_1)

    value_0 = alpha_0 / (alpha_0 + beta_0)
    value_1 = alpha_1 / (alpha_1 + beta_1)

    var_0 = (alpha_0 * beta_0) / (T.pow(alpha_0 + beta_0, 2) * (alpha_0 + beta_0 + 1))
    var_1 = (alpha_1 * beta_1) / (T.pow(alpha_1 + beta_1, 2) * (alpha_1 + beta_1 + 1))

    # Weighting by variance
    variance_bias = var_0 / (var_0 + var_1)

    w_value_0 = value_0 * (1 - (variance_bias * var_weight))
    w_value_1 = value_1 * (1 - (1 - variance_bias) * var_weight)

    value = ((w_value_0 - w_value_1) + 1) / 2.

    # Softmax
    value = np.exp(b * value) / (np.exp(b * value) + np.exp(b * (1 - value)))

    return (value, alpha_0, beta_0, alpha_1, beta_1, o, o2, var_0, var_1, value_0, value_1, unchosen_0, unchosen_1, variance_bias, w_value_0, w_value_1)
Beispiel #33
0
    def value(self, tau0):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.coefficients

        # Format the lags correctly
        tau0 = tt.abs_(tau0)
        tau = tt.reshape(tau0,
                         tt.concatenate([tau0.shape, [1]]),
                         ndim=tau0.ndim + 1)

        # Precompute some factors
        dpt = dt + tau
        dmt = dt - tau

        # Real parts:
        # tau > Delta
        crd = cr * dt
        norm = 1.0 / (crd)**2
        factor = (tt.exp(crd) + tt.exp(-crd) - 2) * norm,
        K_large = tt.sum(ar * tt.exp(-cr * tau) * factor, axis=-1)

        # tau < Delta
        K_small = tt.sum((2 * cr * (dmt) + tt.exp(-cr * dmt) +
                          tt.exp(-cr * dpt) - 2 * tt.exp(-cr * tau)) * norm,
                         axis=-1)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c**2
        d2 = d**2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = 1.0 / (dt * c2pd2)**2
        k0 = tt.exp(-c * tau)
        cdt = tt.cos(d * tau)
        sdt = tt.sin(d * tau)

        # For tau > Delta
        cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1)
        sin_term = 2 * (tt.sinh(cd) * tt.sin(dd))
        factor = k0 * norm
        K_large += tt.sum((C1 * cos_term - C2 * sin_term) * factor * cdt,
                          axis=-1)
        K_large += tt.sum((C2 * cos_term + C1 * sin_term) * factor * sdt,
                          axis=-1)

        # Real part
        edmt = tt.exp(-c * dmt)
        edpt = tt.exp(-c * dpt)
        cos_term = edmt * tt.cos(d * dmt) + edpt * tt.cos(
            d * dpt) - 2 * k0 * cdt
        sin_term = edmt * tt.sin(d * dmt) + edpt * tt.sin(
            d * dpt) - 2 * k0 * sdt
        K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
        K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)

        return tt.switch(tt.le(tau0, dt), K_small, K_large)
Beispiel #34
0
    def logp(self, value):
        p = self.p
        k = self.k

        sumto1 = theano.gradient.zero_grad(T.le(abs(T.sum(p) - 1), 1e-5))
        return bound(T.log(p[value]),
                     value >= 0, value <= (k - 1),
                     sumto1)
Beispiel #35
0
def huber_loss(y_true, y_pred):
    max_delta = 1.0
    error = y_true - y_pred
    abs_error = np.abs(error)
    loss1 = 0.5 * T.square(error)
    loss2 = max_delta * abs_error - 0.5 * T.square(max_delta)
    loss = T.switch(T.le(abs_error, max_delta), loss1, loss2)
    return T.mean(loss)
Beispiel #36
0
    def upwind(dip_ind, str_ind, StartTimes, slownesses, patch_size):
        [n_patch_dip, n_patch_str] = slownesses.shape
        zero = theano.shared(0)
        s1 = str_ind - 1
        d1 = dip_ind - 1
        s2 = str_ind + 1
        d2 = dip_ind + 1

        # if a < b return b
        checked_s1 = ifelse(tt.lt(s1, zero), zero, s1)
        checked_d1 = ifelse(tt.lt(d1, zero), zero, d1)

        # if a =< b return a-1
        checked_s2 = ifelse(tt.le(n_patch_str, s2), n_patch_str - 1, s2)
        checked_d2 = ifelse(tt.le(n_patch_dip, d2), n_patch_dip - 1, d2)

        ST_xmin = tt.min(
            (StartTimes[checked_d1, str_ind], StartTimes[checked_d2, str_ind]))
        ST_ymin = tt.min(
            (StartTimes[dip_ind, checked_s1], StartTimes[dip_ind, checked_s2]))

        ### Eikonal equation solver ###
        # The unique solution to the equation
        # [(x-a)^+]^2 + [(x-b)^+]^2 = f^2 * h^2
        # where a = u_xmin, b = u_ymin, is
        #
        #         | min(a,b) + f*h,                           |a-b|>= f*h
        # xnew =  |
        #         |0.5 * [ a+b+sqrt( 2*f^2*h^2 - (a-b)^2 ) ], |a-b| < f*h
        start_new = ifelse(
            tt.le(slownesses[dip_ind, str_ind] * patch_size,
                tt.abs_(ST_xmin - ST_ymin)),
            tt.min((ST_xmin, ST_ymin)) + slownesses[dip_ind, str_ind] * \
                patch_size,
            (ST_xmin + ST_ymin + \
                tt.sqrt(2 * tt.pow(slownesses[dip_ind, str_ind], 2) * \
                                tt.pow(patch_size, 2) - \
                                tt.pow((ST_xmin - ST_ymin), 2)
                       )) / 2
                          )

        # if a < b return a
        output = ifelse(tt.lt(start_new, StartTimes[dip_ind, str_ind]),
                        start_new, StartTimes[dip_ind, str_ind])
        return tt.set_subtensor(
            StartTimes[dip_ind:dip_ind + 1, str_ind:str_ind + 1], output)
Beispiel #37
0
 def adapt_step(dt, accept_prob, pos, mom, energy, energy_grad,
                k_energy):
     dt = tt.switch(tt.gt(accept_prob**sign, 2.**(-sign)),
                    (2.**sign) * dt, dt)
     accept_prob = leapfrog_accept_prob(dt, pos, mom, energy,
                                        energy_grad, k_energy)
     return (dt, accept_prob), th.scan_module.until(
         tt.all(tt.le(accept_prob**sign, 2.**(-sign))))
Beispiel #38
0
def objective(y_true, y_pred, P, Q, alpha=0., beta=0.15, dbeta=0., gamma=0.01, gamma1=-1., poos=0.23, eps=1e-6):
    '''Expects a binary class matrix instead of a vector of scalar classes.
    '''

    beta = np.float32(beta)
    dbeta = np.float32(dbeta)
    gamma = np.float32(gamma)
    poos = np.float32(poos)
    eps = np.float32(eps)

    # scale preds so that the class probas of each sample sum to 1
    y_pred += eps
    y_pred /= y_pred.sum(axis=-1, keepdims=True)

    y_true = T.cast(y_true.flatten(), 'int64')
    y1 = T.and_(T.gt(y_true, 0), T.le(y_true, Q))  # in-set
    y0 = T.or_(T.eq(y_true, 0), T.gt(y_true, Q))  # out-of-set or unlabeled
    y0sum = y0.sum() + eps  # number of oos
    y1sum = y1.sum() + eps  # number of in-set
    # we want to reduce cross entrophy of labeled data
    # convert all oos/unlabeled to label=0
    cost0 = T.nnet.categorical_crossentropy(y_pred, T.switch(y_true <= Q, y_true, 0))
    cost0 = T.dot(y1, cost0) / y1sum  # average cost per labeled example

    if alpha:
        cost1 = T.nnet.categorical_crossentropy(y_pred, y_pred)
        cost1 = T.dot(y0, cost1) / y0sum  # average cost per labeled example
        cost0 += alpha*cost1

    # we want to increase the average entrophy in each batch
    # average over batch
    if beta:
        y_pred_avg0 = T.dot(y0, y_pred) / y0sum
        y_pred_avg0 = T.clip(y_pred_avg0, eps, np.float32(1) - eps)
        y_pred_avg0 /= y_pred_avg0.sum(axis=-1, keepdims=True)
        cost2 = T.nnet.categorical_crossentropy(y_pred_avg0.reshape((1,-1)), P-dbeta)[0] # [None,:]
        cost2 = T.switch(y0sum > 0.5, cost2, 0.)  # ignore cost2 if no samples
        cost0 += beta*cost2

    # binary classifier score
    if gamma:
        y_pred0 = T.clip(y_pred[:,0], eps, np.float32(1) - eps)
        if gamma1 < 0.:
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot(np.float32(1)-poos*y0.T,T.log(np.float32(1)-y_pred0))
            cost3 /= y_pred.shape[0]
            cost0 += gamma*cost3
        elif gamma1 > 0.:
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot((np.float32(1)-poos)*y0,T.log(np.float32(1)-y_pred0))
            cost3 /= y0sum
            cost31 =  - T.dot(y1,T.log(np.float32(1)-y_pred0))
            cost3 /= y1sum
            cost0 += gamma*cost3 + gamma1*cost31
        else:  # gamma1 == 0.
            cost3 = - T.dot(poos*y0,T.log(y_pred0)) - T.dot((np.float32(1)-poos)*y0, T.log(np.float32(1)-y_pred0))
            cost3 /= y0sum
            cost0 += gamma*cost3
    return cost0
Beispiel #39
0
    def apply(self, y, y_hat):
        epsilon    = 1e-5 # to avoid nan
        mask = (tensor.le(srng.uniform(size=y[:,-1:].shape, dtype=config.floatX), .0005))*1.
        cost = 0
        for i in range(15):
            cost +=  tensor.nnet.binary_crossentropy(y_hat[:,i,:,:], y[:,i,:,:]).mean()

        cost += ( tensor.nnet.binary_crossentropy(y_hat[:,15,:,:], tensor.eq(y[:,15,:,:],1)*1.) * mask ).mean()
        return cost
Beispiel #40
0
 def cost(self,Y,Y_hat):
     w = T.fscalar()
     r = self.r
     w = 0.05
     i = T.le(Y,w)
     j = T.eq(i,0)
     z = T.join(0,Y[i]/r,Y[j])
     z_hat = T.join(0,Y_hat[i]/r,Y_hat[j])
     return super(linear_mlp_bayesian_cost,self).cost(z,z_hat)
Beispiel #41
0
def huber_loss(y_true, y_pred, delta=1., axis=None):
    a = y_true - y_pred
    squared_loss = 0.5*T.sqr(a)
    absolute_loss = (delta*abs(a) - 0.5*T.sqr(delta))

    cost = T.switch(T.le(abs(a), delta),
                    squared_loss,
                    absolute_loss)
    return cost.mean(axis=axis)
Beispiel #42
0
def clip_gradients(gparams, threshold=5.):
    clipped_gparams = []
    for gparam in gparams:
        norm_gparam = T.sqrt(T.sqr(gparam).sum())
        clipped_gparams.append(T.switch(T.le(norm_gparam, threshold),
                                        gparam,
                                        (gparam/norm_gparam)*threshold))

    return clipped_gparams
Beispiel #43
0
def theano_symbolic_dtw(x1, x2, x1_lengths, x2_lengths, distance_function=cosine, normalize=True, debug_level=None,
                        eps=None):
    """
    A symbolic implementation of DTW that supports batches of sequence pairs.

    Returns a scalar if ndim == 2 and a vector of size x1.shape[1] if ndim == 3

    This is slow! About 90 times slower than the Cython implementation using the parameters below.

    :param x1: A tensor containing the first side of the sequence pairs to be aligned.
    :param x2: A tensor containing the second side of the sequence pairs to be aligned.
    :param x1_lengths: An integer vector identifying the lengths of the sequences in x1
    :param x2_lengths: An integer vector identifying the lengths of the sequences in x2
    :param distance_function: The symbolic distance function to use (e.g. a reference to a function in
                              distance).
    :param normalize: Whether the DTW distances should be sequence length normalized.
    :param debug_level: The debug level to use (see above for explanation).
    :param eps: The minimum value to use inside the distance function. Set to the machine epsilon if None.
    :return: The DTW distances for every sequence pair in the batch.
    """

    if eps is None:
        eps = numpy.dtype(theano.config.floatX).type(numpy.finfo(float).eps)

    assert 0 <= x1_lengths.ndim == x2_lengths.ndim <= 1
    assert isinstance(normalize, bool)

    ndim = x1.ndim
    assert 2 <= ndim == x2.ndim <= 3

    # Ensure x2 is the shorter input to minimize the number of scan iterations
    x1_shorter_than_x2 = tt.le(x1.shape[0], x2.shape[0])
    x1, x2 = _swap(x1_shorter_than_x2, x1, x2, 'x1', 'x2', debug_level)
    x1_lengths, x2_lengths = _swap(x1_shorter_than_x2, x1_lengths, x2_lengths, 'x1_lengths', 'x2_lengths', debug_level)

    # Compute distances between x1 sequences and paired x2 sequences
    d = distance_function(x1, x2, eps)

    # Iterate over the temporal slices of x2. See dtw_outer_step for an explanation of the other inputs to this scan
    # operation
    x1_indexes = tt.arange(x1.shape[0], dtype=DTYPE_INT64)
    results, _ = theano.scan(_create_dtw_outer_step(distance_function, debug_level), sequences=[x1_indexes, d],
                             outputs_info=[
                                 tt.zeros_like(x2[:, :, 0] if x2.ndim == 3 else x2[:, 0], dtype=theano.config.floatX)],
                             non_sequences=[x1_lengths, x2_lengths])
    result = results[x1_lengths - 1, x2_lengths - 1, tt.arange(x1.shape[1])] if x2.ndim == 3 else results[
        x1_lengths - 1, x2_lengths - 1]
    result = _debug(result, 'theano_symbolic_dtw.result', debug_level)
    assert result.ndim == x1_lengths.ndim

    # Length normalize the distances if requested to do so
    if normalize:
        result = _debug(result / tt.cast(x1_lengths + x2_lengths, dtype=utility.get_standard_dtype()),
                        'theano_symbolic_dtw.norm_result', debug_level)

    return result
Beispiel #44
0
 def output_index(self):
   from theano.ifelse import ifelse
   index = self.index
   if self.sources:
     # In some cases, e.g. forwarding, the target index (for "classes") might have shape[0]==0.
     # Or shape[0]==1 with index[0]==0. See Dataset.shapes_for_batches().
     # Use source index in that case.
     have_zero = T.le(index.shape[0], 1) * T.eq(T.sum(index[0]), 0)
     index = ifelse(have_zero, T.cast(self.sources[0].index,'int8'), T.cast(index,'int8'))
   return index
Beispiel #45
0
    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = t.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.0) * log(det(X))
        return bound(result, n > 0, all(le(X, 1)), all(ge(X, -1)))
Beispiel #46
0
    def logp(self, value):
        p = self.p
        k = self.k

        sumto1 = theano.gradient.zero_grad(tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
        if p.ndim > 1:
            a = tt.log(p[tt.arange(p.shape[0]), value])
        else:
            a = tt.log(p[value])
        return bound(a, value >= 0, value <= (k - 1), sumto1)
Beispiel #47
0
def innerL_(sS, i):
    Ei = calcEk_(sS, i)
    
    # use "+" instead of "or" and "*" instead of "and"
    checkUselessAlpha1 = T.ge(sS.labels[i] * Ei, -sS.tol) + T.ge(sS.alphas[i], sS.C)
    checkUselessAlpha2 = T.le(sS.labels[i]*Ei, sS.tol) + T.lt(sS.alphas[i], 0)
    isUselessAlpha = toTheanoBool(checkUselessAlpha1 * checkUselessAlpha2)
    
    updateL = innerL_alphaInRange_(sS, i, Ei)
    earlyret = sS.retlist(0)
    return ifelse(isUselessAlpha, earlyret, updateL)
Beispiel #48
0
 def loss_confident_bootstrapping(self, y, factor=1):
     #Customized categorical cross entropy.
     #Based on the multibox impl. More tuned to paper. More strict
     p = self.output
     #Only confident predictions are included. Everything between 0.2 and 0.8 is disregarded. 60% of the range.
     hardUpper = T.gt(p, 0.8)
     hardLower = T.le(p, 0.2)
     loss = (
         - T.sum( ((factor * y) + ((1.0- factor) * hardUpper)) * T.log(p) ) -
         T.sum( ((factor * (1.0 - y)) + ((1.0- factor) * hardLower)) * T.log(1.0 - p) )
     )
     return loss/self.size
Beispiel #49
0
def normalizeAngle(theta):
    if T.gt(theta, -np.pi) and T.lt(theta, np.pi):
        return theta
    else:
        twopi = 2*np.pi
        mult = np.floor(theta / twopi)
        theta -= mult * twopi
        if T.ge(theta, np.pi):
            theta -= twopi
        elif T.le(theta, -np.pi):
            theta += twopi
    return theta
Beispiel #50
0
  def cubicBSpline(self, L):
    b = T.zeros_like(L)

    idx4 = T.ge(L, 0) * T.lt(L, 1)
    idx3 = T.ge(L, 1) * T.lt(L, 2)
    idx2 = T.ge(L, 2) * T.lt(L, 3)
    idx1 = T.ge(L, 3) * T.le(L, 4)

    b = T.switch(T.eq(idx4, 1), T.pow(L, 3) / 6, b)
    b = T.switch(T.eq(idx3, 1), (-3*T.pow(L-1,3) + 3*T.pow(L-1,2) + 3*(L-1) + 1) / 6, b)
    b = T.switch(T.eq(idx2, 1), ( 3*T.pow(L-2,3) - 6*T.pow(L-2,2)           + 4) / 6, b)
    b = T.switch(T.eq(idx1, 1), (-  T.pow(L-3,3) + 3*T.pow(L-3,2) - 3*(L-3) + 1) / 6, b)
    
    return b.T # b is K x K' and thus, as we multiply from the right with
Beispiel #51
0
 def get_train_func(self, learning_rate, nce=True, em=False):
   print >>sys.stderr, "Trainining type: EM = %s, NCE = %s"%(em, nce)
   # TODO: Implement AdaGrad
   x, y_s = T.ivector("x"), T.imatrix("y_s")
   if em:
     cost = -self.get_sym_nc_complete_expectation(x, y_s) if nce else -self.get_sym_complete_expectation(x, y_s)
   else:
     cost = -T.log(self.get_sym_nc_direct_prob(x, y_s)) if nce else -T.log(self.get_sym_direct_prob(x, y_s))
   params = self.repr_params + self.enc_params + self.rec_params
   g_params = T.grad(cost, params)
   # Updating the parameters only if the norm of the gradient is less than 100.
   # Important: This check also takes care of any element in the gradients being nan. The conditional returns False even in that case.
   updates=[ (p, ifelse(T.le(T.nlinalg.norm(g, None), T.constant(100.0, dtype='float64')), p - learning_rate * g, p)) for p, g in zip(params, g_params) ]
   train_func = theano.function([x, y_s], cost, updates=updates)
   return train_func
Beispiel #52
0
def alpha_huber(y_true, y_pred):
    """ sets the epislon in huber loss equal to a percentile of the residuals
    """
    # abs_r = T.abs_(y_pred - y_true)
    # loss = 0.5 * T.sqr(abs_r)
    # epsilon = np.percentile(loss, alpha * 100)
    # idx = abs_r <= epsilon
    # loss[idx] = epsilon * abs_r[idx] - 0.5 * T.sqr(epsilon)
    #switch(cond, ift, iff)
    alpha=0.95
    abs_r = T.abs_(y_pred - y_true)
    epsilon = np.percentile(0.5 * T.sqr(abs_r), alpha * 100)
    loss =T.switch(T.le(abs_r,epsilon),epsilon * abs_r - 0.5 * T.sqr(epsilon),0.5 * T.sqr(abs_r))

    return loss
Beispiel #53
0
    def get_gradients(self, model, data, **kwargs):
        space, sources = self.get_data_specs(model)
        space.validate(data)
        assert isinstance(model, CompressAdversaryPair)
        g = model.compressor
        d = model.discriminator

        #get raw gradients for d and g objectives...
        d_obj, g_obj = self.get_objectives(model, data)
        g_params = g.get_params()
        d_params = d.get_params()
        for param in g_params:
            assert param not in d_params
        for param in d_params:
            assert param not in g_params
        
        d_grads = T.grad(d_obj, d_params)
        g_grads = T.grad(g_obj, g_params)

        # if self.scale_grads:
        #     S_grad = T.grad(g_obj, S)
        #     scale = T.maximum(1., self.target_scale / T.sqrt(T.sqr(S_grad).sum()))
        #     g_grads = [g_grad * scale for g_grad in g_grads]

        #adjust raw gradients with control signals
        rval = OrderedDict()
        zeros = itertools.repeat(theano.tensor.constant(0., dtype='float32'))

        if self.ever_train_discriminator:
            rval.update(OrderedDict(safe_zip(d_params, [self.now_train_discriminator * dg for dg in d_grads])))
        else:
            rval.update(OrderedDict(zip(d_params, zeros)))

        if self.ever_train_compressor:
            rval.update(OrderedDict(safe_zip(g_params, [self.now_train_compressor * gg for gg in g_grads])))
        else:
            rval.update(OrderedDict(zip(g_params, zeros)))

        #update control signals using the updates return functionality
        updates = OrderedDict()
        #first, the clock
        self.future_train_clock = T.switch(T.ge(self.train_clock,self.discriminator_steps+self.joint_steps+self.compressor_steps),1.,self.train_clock+1.)
        updates[self.train_clock] = self.future_train_clock
        #then the control signals
        updates[self.now_train_discriminator] = T.switch(T.le(self.future_train_clock,self.discriminator_steps+self.joint_steps),1.,0.)
        updates[self.now_train_compressor] = T.switch(T.gt(self.future_train_clock,self.discriminator_steps),1.,0.)

        return rval, updates
def frank(u,v,d,cut=25):
    '''
    Frank Copula
    '''
    d = (TT.nnet.sigmoid(d)-0.5)*cut
    U = TT.exp(-d*u)-1
    V = TT.exp(-d*v)-1
    D = TT.exp(-d  )-1
    C = 1+U*V/D

    idx = TT.le(C,0).nonzero()
    C = TT.set_subtensor(C[idx],0)

    C = -1/(d) * TT.log(C)

    return C
Beispiel #55
0
    def logp(self, value):
        p = self.p
        k = self.k

        # Clip values before using them for indexing
        value_clip = tt.clip(value, 0, k - 1)

        sumto1 = theano.gradient.zero_grad(
            tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))

        if p.ndim > 1:
            a = tt.log(p[tt.arange(p.shape[0]), value_clip])
        else:
            a = tt.log(p[value_clip])

        return bound(a, value >= 0, value <= (k - 1), sumto1)
Beispiel #56
0
 def initialize(self):
   assert self.loss in (
     'ctc', 'ce_ctc', 'hmm', 'ctc2', 'sprint', 'viterbi', 'fast_bw', 'warp_ctc'), 'invalid loss: ' + self.loss
   self.y_m = T.reshape(self.z, (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2)
   if not self.attrs.get("apply_softmax", True):
     self.p_y_given_x_flat = self.y_m
     self.p_y_given_x = self.z
     self.z = T.log(self.z)
     self.y_m = T.log(self.y_m)
   elif self.attrs.get("gauss_outputs", False):
     self.y_m = -T.sqr(self.y_m)
     self.p_y_given_x_flat = T.exp(self.y_m)
     self.p_y_given_x = T.reshape(self.p_y_given_x_flat, self.z.shape)
   else:  # standard case
     self.p_y_given_x_flat = T.nnet.softmax(self.y_m)
     self.p_y_given_x = T.reshape(T.nnet.softmax(self.y_m), self.z.shape)
   self.y_pred = T.argmax(self.p_y_given_x_flat, axis=-1)
   self.output = self.p_y_given_x
   if self.attrs.get('compute_priors', False):
     exp_average = self.attrs.get("compute_priors_exp_average", 0)
     custom = T.mean(self.p_y_given_x_flat[self.i], axis=0)
     custom_init = numpy.ones((self.attrs['n_out'],), 'float32') / numpy.float32(self.attrs['n_out'])
     if self.attrs.get('use_label_priors', 0) > 0:  # use labels to compute priors in first epoch
       custom_0 = T.mean(theano.tensor.extra_ops.to_one_hot(self.y_data_flat[self.i], self.attrs['n_out'], 'float32'),
                         axis=0)
       custom = T.switch(T.le(self.network.epoch, self.attrs.get('use_label_priors', 0)), custom_0, custom)
       custom_init = numpy.zeros((self.attrs['n_out'],), 'float32')
     self.priors = self.add_param(theano.shared(custom_init, 'priors'), 'priors',
                                  custom_update=custom,
                                  custom_update_normalized=not exp_average,
                                  custom_update_exp_average=exp_average)
     self.log_prior = T.log(T.maximum(self.priors, numpy.float32(1e-20)))
   self._maybe_substract_prior_from_output()
   if self.attrs.get('compute_distortions', False):
     p = self.p_y_given_x_flat[self.i]
     momentum = p[:-1] * p[1:]
     momentum = T.sum(momentum,axis=-1)
     loop = T.mean(momentum)
     forward = numpy.float32(1) - loop
     self.distortions = {
       'loop' : self.add_param(theano.shared(numpy.ones(1,) * numpy.float32(0.5), 'loop'), 'loop',
                               custom_update = loop,
                               custom_update_normalized=True),
       'forward' : self.add_param(theano.shared(numpy.ones(1,) * numpy.float32(0.5), 'forward'), 'forward',
                                  custom_update = forward,
                                  custom_update_normalized=True)
     }
Beispiel #57
0
    def step(ord):
        i = ord[0]

        xx1 = T.maximum(x1[i], x1[ord[1:]])
        yy1 = T.maximum(y1[i], y1[ord[1:]])
        xx2 = T.minimum(x2[i], x2[ord[1:]])
        yy2 = T.minimum(y2[i], y2[ord[1:]])

        w = T.maximum(0.0, xx2 - xx1 + 1)
        h = T.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[ord[1:]] - inter)

        inds = T.le(ovr, thresh).nonzero()[0]
        ord = ord[inds + 1]

        return (i, ord), until(order.size > 0)