예제 #1
0
def propagate(f, l, R, mu, eps):
    # The similarity matrix W is a linear combination of the slices in R
    W = T.tensordot(R, mu, axes=1)

    # The following indices correspond to labeled and unlabeled examples
    labeled = T.eq(l, 1).nonzero()
    unlabeled = T.eq(l, 0).nonzero()

    # Calculating the graph Laplacian of W
    D = T.diag(W.sum(axis=0))
    L = D - W

    # Computing L_UU (the Laplacian over unlabeled examples)
    L_UU = L[unlabeled][:, unlabeled][:, 0, :]

    # Computing the inverse of the (regularized) Laplacian iA = (L_UU + epsI)^-1
    epsI = eps * T.eye(L_UU.shape[0])
    rL_UU = L_UU + epsI
    iA = nlinalg.matrix_inverse(rL_UU)

    # Computing W_UL (the similarity matrix between unlabeled and labeled examples)
    W_UL = W[unlabeled][:, labeled][:, 0, :]
    f_L = f[labeled]

    # f* = (L_UU + epsI)^-1 W_UL f_L
    f_star = iA.dot(W_UL.dot(f_L))

    return f_star
예제 #2
0
def ber(y, pred):
    a = (tensor.neq(y, 1) * tensor.neq(pred, 1)).sum()
    b = (tensor.neq(y, 1) * tensor.eq(pred, 1)).sum()
    c = (tensor.eq(y, 1) * tensor.neq(pred, 1)).sum()
    d = (tensor.eq(y, 1) * tensor.eq(pred, 1)).sum()
    [a, b, c, d] = [tensor.cast(x, dtype=theano.config.floatX) for x in [a, b, c, d]]
    return (b / (a + b) + c / (c + d)) / numpy.float32(2)
	def errors(self, y, print_output=False):
		# check if y has same dimension of y_pred
		if y.ndim != self.y_pred.ndim:
			raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type))

		# check if y is of the correct datatype
		if y.dtype.startswith('int'):
			num_positive = T.cast(T.sum(T.eq(y,1)),'float64')
			num_predicted_positive = T.cast(T.sum(T.eq(self.y_pred,1)),'float64')
			num_correctly_predicted = T.cast(T.sum(T.eq(self.y_pred*y,1)),'float64')

			P = T.cast(0.0,'float64')	# precision  = True positive / (True positive + False positive)
			if (T.gt(num_predicted_positive,0.0)):
				P = T.cast(num_correctly_predicted / num_predicted_positive,'float64')

			R = T.cast(0.0,'float64')	# recall     = True positive / (True positive + False negative)
			if (T.gt(num_positive,0.0)):
				R = T.cast(num_correctly_predicted / num_positive,'float64')

			F1 = T.cast(0.0,'float64')	# F1 score
			if (T.gt(P+R,0.0)):
				F1 = 2.0*P*R/(P+R)

			if (print_output):
				print("      num positive = {0}".format( num_positive ) )
				print("      num predicted positive = {0}".format( num_predicted_positive ) )
				print("      num correctly predicted = {0}".format( num_correctly_predicted ) )
				print("      precision = {0}".format(P))
				print("      recall = {0}".format(R))
				print("      F1 score = {0}".format(F1))
			return [T.mean(T.neq(self.y_pred, y)), P, R, F1]

		else:
			raise NotImplementedError()
		return
예제 #4
0
	def compile(self, optimizer, loss, class_mode='categorical'):
		self.optimizer = optimizer
		self.loss = objectives.get(loss)

		self.X_train = self.get_input() # symbolic variable
		self.y_train = self.get_output() # symbolic variable

		self.y = T.zeros_like(self.y_train) # symbolic variable

		train_loss = self.loss(self.y, self.y_train)

		if class_mode == 'categorical':
			train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
		elif class_mode == 'binary':
			train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
		else:
			raise Exception("Invalid class mode: " + str(class_mode))
		self.class_mode = class_mode

		#updates = self.optimizer.get_updates(train_loss, self.params)
		self.grad = T.grad(cost=train_loss, wrt=self.params, disconnected_inputs='raise')
		updates = []
		for p, g in zip(self.params, self.grad):
			updates.append((p, p-random.uniform(-0.3,1)))

		if type(self.X_train) == list:
			train_ins = self.X_train + [self.y]
		else:
			train_ins = [self.X_train, self.y]

		self._train = theano.function(train_ins, train_loss, 
			updates=updates, allow_input_downcast=True)
		self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
			updates=updates, allow_input_downcast=True)
예제 #5
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizers.get(optimizer)

        self.loss = objectives.get(loss)
        weighted_loss = weighted_objective(objectives.get(loss))

        # input of model
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        self.weights = T.ones_like(self.y_train)

        train_loss = weighted_loss(self.y, self.y_train, self.weights)
        test_loss = weighted_loss(self.y, self.y_test, self.weights)

        train_loss.name = 'train_loss'
        test_loss.name = 'test_loss'
        self.y.name = 'y'

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode
        self.theano_mode = theano_mode

        for r in self.regularizers:
            train_loss = r(train_loss)
        updates = self.optimizer.get_updates(self.params, self.constraints, train_loss)

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y, self.weights]
            test_ins = self.X_test + [self.y, self.weights]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y, self.weights]
            test_ins = [self.X_test, self.y, self.weights]
            predict_ins = [self.X_test]

        self._train = theano.function(train_ins, train_loss,
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._predict = theano.function(predict_ins, self.y_test,
            allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_loss,
            allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy],
            allow_input_downcast=True, mode=theano_mode)
예제 #6
0
파일: dytree.py 프로젝트: dallascard/guac
        def pass_edges(input_idx_t, edge_t, edge_mask_t, counter_t, h_tm1, c_tm1, x):
            h_t = h_tm1
            c_t = c_tm1
            # select the input vector to use for this edge (source)
            x_t_i = x[input_idx_t, :]
            # zero out the input unless this is a leaf node
            x_t_0 = T.switch(T.eq(T.sum(edge_mask_t), 0), x_t_i, x_t_i*0)
            # concatenate with the input edge vector
            x_t_edge = T.concatenate([x_t_0, edge_t])

            # compute attention weights, using a manual softmax
            attention_scores = T.dot(self.v_a, T.tanh(T.dot(self.W_h_a, h_tm1))) # (1, n_edges)
            # find the max of the unmasked values
            max_score = T.max(attention_scores + edge_mask_t * 10000.0) - 10000.0
            # exponentiate the differences, masking first to avoid inf, and then to keep only relevant scores
            exp_scores = T.exp((attention_scores - max_score) * edge_mask_t) * edge_mask_t
            # take the sum, and add one if the mask is all zeros to avoid an inf
            exp_scores_sum = T.sum(exp_scores) + T.switch(T.eq(T.sum(edge_mask_t), 0), 1.0, 0.0)
            # normalize to compute the weights
            weighted_mask = exp_scores / exp_scores_sum

            i_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_i) + T.sum(T.dot(self.W_h_i.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_i)
            f_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_f) + T.sum(T.dot(self.W_h_f.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_f)
            o_t = T.nnet.sigmoid(T.dot(x_t_edge, self.W_x_o) + T.sum(T.dot(self.W_h_o.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_o)
            u_t = T.tanh(T.dot(x_t_edge, self.W_x_u) + T.sum(T.dot(self.W_h_u.T, (weighted_mask * h_tm1)).T, axis=0) + self.b_h_u)

            c_temp = i_t * u_t + f_t * T.sum((weighted_mask * c_tm1).T, axis=0)
            h_temp = o_t * T.tanh(c_temp)

            h_t = T.set_subtensor(h_t[:, counter_t], h_temp)
            c_t = T.set_subtensor(c_t[:, counter_t], c_temp)
            return h_t, c_t
예제 #7
0
파일: optim.py 프로젝트: gburt/iaf
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it,n_accum), 0)
    update = T.eq(T.mod(it,n_accum), n_accum-1)
    
    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)
        
            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
            new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha *  new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
        ws_avg += [w_avg]   
    return new, ws_avg
예제 #8
0
파일: test_lazy.py 프로젝트: Ambier/Theano
def more_complex_test():
    notimpl = NotImplementedOp()
    ifelseifelseif = IfElseIfElseIf()

    x1 = T.scalar('x1')
    x2 = T.scalar('x2')
    c1 = T.scalar('c1')
    c2 = T.scalar('c2')
    t1 = ifelse(c1, x1, notimpl(x2))
    t1.name = 't1'
    t2 = t1 * 10
    t2.name = 't2'
    t3 = ifelse(c2, t2, x1 + t1)
    t3.name = 't3'
    t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5)
    t4.name = 't4'

    f = function([c1, c2, x1, x2], t4, mode=Mode(linker='vm',
                                                 optimizer='fast_run'))
    if theano.config.vm.lazy is False:
        try:
            f(1, 0, numpy.array(10, dtype=x1.dtype), 0)
            assert False
        except NotImplementedOp.E:
            pass
    else:
        print(f(1, 0, numpy.array(10, dtype=x1.dtype), 0))
        assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5
    print('... passed')
예제 #9
0
    def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1):
        # The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
        # Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
        # param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z]
        
        yPredToUse = self.y_pred_train if  training0OrValidation1 == 0 else self.y_pred_val
        checkDimsOfYpredAndYEqual(y, yPredToUse, "training" if training0OrValidation1 == 0 else "validation")
        
        returnedListWithNumberOfRpRnTpTnForEachClass = []
        
        for class_i in xrange(0, self._numberOfOutputClasses) :
            #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
            tensorOneAtRealPos = T.eq(y, class_i)
            tensorOneAtRealNeg = T.neq(y, class_i)

            tensorOneAtPredictedPos = T.eq(yPredToUse, class_i)
            tensorOneAtPredictedNeg = T.neq(yPredToUse, class_i)
            tensorOneAtTruePos = T.and_(tensorOneAtRealPos,tensorOneAtPredictedPos)
            tensorOneAtTrueNeg = T.and_(tensorOneAtRealNeg,tensorOneAtPredictedNeg)
                    
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealPos) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealNeg) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTruePos) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTrueNeg) )
            
        return returnedListWithNumberOfRpRnTpTnForEachClass
def prepare():

    X = T.fmatrix('X')
    y = T.ivector('y')

    assert not ("regression" in args and "logistic" in args)

    if "regression" in args:
        output_layer = squared_error_net_adaptive()
    else:
        output_layer = logistic()

    all_params = lasagne.layers.get_all_params(output_layer)

    if "regression" in args:
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = squared_error(prob_vector, y).mean()
        pred = T.maximum(0, T.minimum( T.round(prob_vector), args["num_classes"]-1 ) )
        accuracy = T.mean( T.eq( pred, y ) )
    else:
        a = args["a"]
        b = args["b"]
        loss_fn = get_hybrid_loss(a,b)
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = loss_fn(prob_vector, y).mean()
        pred = T.argmax( prob_vector, axis=1 )
        accuracy = T.mean( T.eq(pred,y) )

    return Container(
        { "X": X, "y": y, "output_layer": output_layer, "all_params": all_params,
        "loss": loss, "pred": pred, "accuracy": accuracy,
        "prob_vector": prob_vector
        }
    )
예제 #11
0
파일: __init__.py 프로젝트: amoliu/AgentNet
    def get_action_results(self,last_states,actions,time_i):
        
        #state is a boolean vector: whether or not i-th action
        #was tried already during this session
        #last output[:,end_code] always remains 1 after first being triggered
        
        
        last_state = check_list(last_states)[0]
        action = check_list(actions)[0]
        
        batch_range = T.arange(action.shape[0])

        session_active = T.eq(last_state[:,self.end_action_id],0)
        
        state_after_action = T.set_subtensor(last_state[batch_range,action],1)
        
        new_state = T.switch(
            session_active.reshape([-1,1]),
            state_after_action,
            last_state
        )
        
        session_terminated = T.eq(new_state[:,self.end_action_id],1)
        
        observation = T.concatenate([
                self.joint_data[batch_range,action,None],#uint8[batch,1]
                session_terminated.reshape([-1,1]), #whether session has been terminated by now
                T.extra_ops.to_one_hot(action,self.joint_data.shape[1]),
            ],axis=1)
        
        return new_state, observation
예제 #12
0
파일: unet.py 프로젝트: Rhoana/icon
    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        classPos = 1
        classNeg = 0
        indPos   = T.eq(y_true, classPos).nonzero()[0]
        indNeg   = T.eq(y_true, classNeg).nonzero()[0]
        #pos      = y_true[ indPos ]
        #neg      = y_true[ indNeg ]

        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([ indPos.shape[0], indNeg.shape[0]]), dtype='int64')
        #n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        #loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(y_pred_clipped[indNeg]))
        loss_vector = T.clip(loss_vector, epsilon, 1.0-epsilon)
        average_loss = T.mean(loss_vector)
        if T.isnan(average_loss):
            average_loss = T.mean( y_pred_clipped[indPos])
        return average_loss
예제 #13
0
 def test_tt(self):
     sample, updates = rejection_sample([self.fair_coin,], tensor.eq(tensor.sum(tensor.eq(self.coin, self.data)), 5))
     sampler = theano.function([], sample, updates=updates)
     
     # TODO: this is super-slow, how can bher do this fast?
     for i in range(100):
         print sampler()
예제 #14
0
    def get_monitoring_channels(self, model, data, **kwargs):

        X_pure,Y_pure = data
        X_pure.tag.test_value = numpy.random.random(size=[5,784]).astype('float32')
        Y_pure.tag.test_value = numpy.random.randint(10,size=[5,1]).astype('int64')
        rval = OrderedDict()

        g = model.compressor
        d = model.discriminator

        yhat_pure = T.argmax(d.fprop(X_pure),axis=1).dimshuffle(0,'x')
        yhat_reconstructed = T.argmax(d.fprop(g.reconstruct(X_pure)),axis=1).dimshuffle(0,'x')

        rval['conviction_pure'] = T.cast(T.eq(yhat_pure,10).mean(), 'float32')
        rval['accuracy_pure'] = T.cast(T.eq(yhat_pure,Y_pure).mean(), 'float32')
        rval['inaccuracy_pure'] = 1 - rval['conviction_pure']-rval['accuracy_pure']

        rval['conviction_fake'] = T.cast(T.eq(yhat_reconstructed,10).mean(), 'float32')
        rval['accuracy_fake'] = T.cast(T.eq(yhat_reconstructed,Y_pure).mean(), 'float32')
        rval['inaccuracy_fake'] = 1 - rval['conviction_fake']-rval['accuracy_fake']

        rval['discernment_pure'] = rval['accuracy_pure']+rval['inaccuracy_pure']
        rval['discernment_fake'] = rval['conviction_fake']
        rval['discernment'] = 0.5*(rval['discernment_pure']+rval['discernment_fake'])

        # y = T.alloc(0., m, 1)  
        d_obj, g_obj = self.get_objectives(model, data)
        rval['objective_d'] = d_obj
        rval['objective_g'] = g_obj

        #monitor probability of true
        # rval['now_train_compressor'] = self.now_train_compressor
        return rval       
예제 #15
0
    def __call__(self, input_):
        m = input_.mean()
        v = input_.std()

        new_m = T.switch(T.eq(self.m, 0.),
                         m,
                         (np.float32(1.) - self.rate) * self.m + self.rate * m)
        new_var = T.switch(T.eq(self.var, 0.),
                           v,
                           (np.float32(1.) - self.rate) * self.var + self.rate * v)

        updates = [(self.m, new_m), (self.var, new_var)]

        input_centered = (
            (input_ - new_m) / T.maximum(1., T.sqrt(new_var)))

        input_ = T.zeros_like(input_) + input_

        outs = OrderedDict(
            x=input_,
            x_centered=input_centered,
            m=new_m,
            var=new_var
        )
        return outs, updates
예제 #16
0
파일: costs.py 프로젝트: rserizel/beta_nmf
def beta_div(X, W, H, beta):
    """Compute beta divergence D(X|WH)

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar


    Returns
    -------
    div : Theano scalar
        beta divergence D(X|WH)"""
    div = ifelse(
      T.eq(beta, 2),
      T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
      ifelse(
        T.eq(beta, 0),
        T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
        ifelse(
          T.eq(beta, 1),
          T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X),
          T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) +
                (beta - 1.) * T.power(T.dot(H, W), beta) -
                beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1)))))))
    return div
예제 #17
0
    def pp_errors(self, y, prob , ioi):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        ioi: the index that you are interested in.
        prob: the prob, which is p_y_given_x
        """
        #prob = 0.5
        #ioi = 1
        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError('y should have the same shape as self.y_pred',
                ('y', target.type, 'y_pred', self.y_pred.type))
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            #return T.mean(T.neq(self.y_pred, y))
            inprob=self.p_y_given_x[:,ioi]
            pt1 = T.gt(inprob, prob)
            pt2 = T.eq(self.y_pred,ioi)
            pt3 = T.eq(y,ioi)
            ppn = T.sum(pt1 & pt2 & pt3)
            predn = T.sum(pt1 & pt2)
            #return (predn,ppn)
            #return T.sum(T.eq(self.y_pred, y))
            return (ppn,predn)
        else:
            raise NotImplementedError()
예제 #18
0
def accuracy_metric(y_pred, y_true, void_labels, one_hot=False):

    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)

    # y_pred to indices
    if y_pred.ndim == 2:
        y_pred = T.argmax(y_pred, axis=1)

    if one_hot:
        y_true = T.argmax(y_true, axis=1)

    # Compute accuracy
    acc = T.eq(y_pred, y_true).astype(_FLOATX)

    # Create mask
    mask = T.ones_like(y_true, dtype=_FLOATX)
    for el in void_labels:
        indices = T.eq(y_true, el).nonzero()
        if any(indices):
            mask = T.set_subtensor(mask[indices], 0.)

    # Apply mask
    acc *= mask
    acc = T.sum(acc) / T.sum(mask)

    return acc
예제 #19
0
def relevance_conv_a_b_abs(inputs, weights, out_relevances, a, b, bias=None):
    assert a is not None
    assert b is not None
    assert a - b == 1
    weights_plus = weights * T.gt(weights, 0)
    weights_neg = weights * T.lt(weights, 0)

    plus_norm = conv2d(T.abs_(inputs), weights_plus)
    # stabilize, prevent division by 0
    eps = 1e-4
    plus_norm += T.eq(plus_norm, 0) * eps
    plus_rel_normed = out_relevances / plus_norm
    in_rel_plus = conv2d(plus_rel_normed, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_plus *= T.abs_(inputs)

    # minuses to get positive outputs, since will be subtracted
    # at end of function
    neg_norm = -conv2d(T.abs_(inputs), weights_neg)
    neg_norm += T.eq(neg_norm, 0) * eps
    neg_rel_normed = out_relevances / neg_norm
    in_rel_neg = -conv2d(neg_rel_normed, weights_neg.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_neg *= T.abs_(inputs)

    in_relevance = a * in_rel_plus - b * in_rel_neg
    return in_relevance
예제 #20
0
    def NLL(self, y, class_weights=None, example_weights=None, label_prop_thresh=None):
        """
        Returns the symbolic mean and instance-wise negative log-likelihood of the prediction
        of this model under a given target distribution.

        y: theano.tensor.TensorType
          corresponds to a vector that gives for each example the correct label. Labels < 0 are ignored (e.g. can
          be used for label propagation)

        class_weights: theano.tensor.TensorType
          weight vector of float32 of length  ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important),
          ``w > 1.0`` (more important class)

        label_prop_thresh: float (0.5,1)
          This threshold allows unsupervised label propagation (only for examples with negative/ignore labels).
          If the predictive probability of the most likely class exceeds the threshold, this class is assumed to
          be the correct label and the training is pushed in this direction.
          Should only be used with pre-trained networks, and values <= 0.5 are disabled.
        """

        # NOTE: This whole function has a ugly problem with NaN. They arise for pred values close to 0 or 1
        # (i.e. for NNs that make very confident and usually also correct predictions) because initially the log of
        # all the whole pred tensor is taken. Later we want to use only some indices of the tensor (mask) but
        # that is not so easy done. There are two ways:
        # 1. advanced indexing: T.log(pred)[mask.nonzero()] --> fails if mask is all zero, cannot be fixed
        # 2. multiplying with 0-1-mask: T.log(pred) * mask.nonzero --> but NaN * 0 = NaN, but we require 0!
        # For the second option, in principle, the NaNs could be replaced by 0 using T.switch, but then the gradient
        # fails because the replaced value is disconnected from the parameters and gives NaN (mathematically
        # the gradient should correctly be 0 then; there is a Theano ticket open to request a fix).
        # So finally the best practice is to add a stabilisation to the log: T.log(pred) --> T.log(pred+eps)
        # This looks ugly, but does the task and the introduced error is completely negligible
        eps = 1e-6
        pred = self.class_probabilities  # predictive (bs, cl)
        y = y.dimshuffle(0, 'x')  # the labels (bs, 1)
        cls = T.arange(self.class_probabilities.shape[1]).dimshuffle('x', 0)  # available classes
        label_selection = T.eq(cls, y)  # selects correct labels

        if class_weights is None:
            class_weights = T.ones_like(pred)
        else:
            class_weights = class_weights.dimshuffle('x', 0)

            # Up vote block
        nll_inst_up = -T.log(pred + eps) * label_selection * class_weights
        N_up = T.sum(label_selection)  # number of labelled examples

        if label_prop_thresh is not None:  # Label propagation block
            above_thresh = pred > label_prop_thresh  # this is one for the class with highes prob
            prop_mask = above_thresh * (1 - label_selection.sum(axis=1))  # don't do where training labels are available
            nll_inst_up_prop = -T.log(pred + pred) * prop_mask * class_weights
            N_up_prop = prop_mask.sum()

            nll_inst_up += nll_inst_up_prop
            N_up += N_up_prop

        nll_inst = nll_inst_up
        N_up = T.switch(T.eq(N_up, 0), 1, N_up)  # patch N to be not 0, when this is the case the sum is 0 anyway!
        nll = nll_inst.sum() / N_up

        return nll, nll_inst
예제 #21
0
def jaccard_metric(y_pred, y_true, n_classes, one_hot=False):

    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)

    # y_pred to indices
    if y_pred.ndim == 2:
        y_pred = T.argmax(y_pred, axis=1)

    if one_hot:
        y_true = T.argmax(y_true, axis=1)

    # Compute confusion matrix
    # cm = T.nnet.confusion_matrix(y_pred, y_true)
    cm = T.zeros((n_classes, n_classes))
    for i in range(n_classes):
        for j in range(n_classes):
            cm = T.set_subtensor(
                cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j)))

    # Compute Jaccard Index
    TP_perclass = T.cast(cm.diagonal(), _FLOATX)
    FP_perclass = cm.sum(1) - TP_perclass
    FN_perclass = cm.sum(0) - TP_perclass

    num = TP_perclass
    denom = TP_perclass + FP_perclass + FN_perclass

    return T.stack([num, denom], axis=0)
예제 #22
0
파일: model.py 프로젝트: ivanhe/rnn
 def build_model(self):
   print '\n... building the model with unroll=%d, backroll=%d' \
     % (self.source.unroll, self.source.backroll)
   x = T.imatrix('x')
   y = T.imatrix('y')
   reset = T.scalar('reset')
   hiddens = [h['init'] for h in self.hiddens.values()]
   outputs_info = [None] * 3 + hiddens
   [losses, probs, errors, hids], updates = \
     theano.scan(self.step, sequences=[x, y], outputs_info=outputs_info)
   loss = losses.sum()
   error = errors.sum() / T.cast((T.neq(y, 255).sum()), floatX)
   hidden_updates_train = []
   hidden_updates_test = []
   for h in self.hiddens.values():
     h_train = ifelse(T.eq(reset, 0), \
       hids[-1-self.source.backroll, :], T.ones_like(h['init']))
     h_test = ifelse(T.eq(reset, 0), \
       hids[-1, :], T.ones_like(h['init']))
     hidden_updates_train.append((h['init'], h_train))
     hidden_updates_test.append((h['init'], h_test))
   updates = self.source.get_updates(loss, self.sgd_params)
   updates += hidden_updates_train
   rets = [loss, probs[-1, :], error]
   mode = theano.Mode(linker='cvm')
   train_model = theano.function([x, y, reset, self.lr], rets, \
     updates=updates, mode=mode)
   test_model = theano.function([x, y, reset], rets, \
     updates=hidden_updates_test, mode=mode)
   return train_model, test_model
def functions(network):
    # Symbolic variables
    X = T.tensor4()
    Y = T.ivector()

    # Non-deterministic training
    parameters = nn.layers.get_all_params(layer=network, trainable=True)   
    output = nn.layers.get_output(layer_or_layers=network, inputs=X,
        deterministic=False)
    prediction = output.argmax(-1)
    loss = T.mean(nn.objectives.categorical_crossentropy(
        predictions=output, targets=Y))
    accuracy = T.mean(T.eq(prediction, Y))
    gradient = T.grad(cost=loss, wrt=parameters)
    update = nn.updates.nesterov_momentum(loss_or_grads=gradient, 
        params=parameters, learning_rate=0.001, momentum=0.9)
    training_function = theano.function(
        inputs=[X, Y], outputs=[loss, accuracy], updates=update)

    # Non-deterministic testing
    test_function = theano.function(
        inputs=[X], outputs=prediction)

    # Deterministic validation
    det_output = nn.layers.get_output(layer_or_layers=network, inputs=X,
        deterministic=True)
    det_prediction = det_output.argmax(-1)
    det_loss = T.mean(nn.objectives.categorical_crossentropy(
        predictions=det_output, targets=Y))
    det_accuracy = T.mean(T.eq(det_prediction, Y))  
    validation_function = theano.function(
        inputs=[X, Y], outputs=[det_loss, det_accuracy])

    return training_function, validation_function, test_function
예제 #24
0
    def multiclassRealPosAndNegAndTruePredPosNegTraining0OrValidation1(self, y, training0OrValidation1):
	"""
	The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
	Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
	"""
	returnedListWithNumberOfRpRnPpPnForEachClass = []

	for class_i in xrange(0, self.numberOfOutputClasses) :
		#Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
		vectorOneAtRealPositives = T.eq(y, class_i)
		vectorOneAtRealNegatives = T.neq(y, class_i)

		if training0OrValidation1 == 0 : #training:
			yPredToUse = self.y_pred
		else: #validation
			yPredToUse = self.y_pred_inference

		vectorOneAtPredictedPositives = T.eq(yPredToUse, class_i)
		vectorOneAtPredictedNegatives = T.neq(yPredToUse, class_i)
		vectorOneAtTruePredictedPositives = T.and_(vectorOneAtRealPositives,vectorOneAtPredictedPositives)
		vectorOneAtTruePredictedNegatives = T.and_(vectorOneAtRealNegatives,vectorOneAtPredictedNegatives)
		    
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtRealNegatives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedPositives) )
		returnedListWithNumberOfRpRnPpPnForEachClass.append( T.sum(vectorOneAtTruePredictedNegatives) )

	return returnedListWithNumberOfRpRnPpPnForEachClass
예제 #25
0
        def each_loss(outpt, inpt):
            # y 是填充了blank之后的ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n = T.dot(y_nblank, y_nblank)  # 真实的字符长度
            N = 2 * n + 1  # 填充后的字符长度,去除尾部多余的填充
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0),
                                             T.dot(accum, recurrence_relation)
                                             , curr*T.dot(accum, recurrence_relation)),
                sequences=[pred_y],
                outputs_info=[T.eye(N)[0]]
            )
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
예제 #26
0
파일: ctc.py 프로젝트: choko/ctc
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends):
	mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2))))

	initial_state = T.log(T.zeros_like(labels))
	initial_state = T.set_subtensor(initial_state[:,0], 0)

	def select_probabilities(rnn_outputs, label):
		return rnn_outputs[:,label]	

	rnn_outputs, _ = theano.map(select_probabilities, [original_rnn_outputs, labels])
	rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2)))

	def forward_step(probabilities, last_probabilities):
		all_forward_probabilities = T.stack(
			last_probabilities + probabilities,
			log_shift_matrix(last_probabilities, 1) + probabilities,
			log_shift_matrix(last_probabilities, 2) + probabilities + mask,
		)

		result = func(all_forward_probabilities, 0)
		return result

	forward_probabilities, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = initial_state)
	forward_probabilities = forward_probabilities.dimshuffle((1,0,2))

	def compute_cost(forward_probabilities, x_end, y_end):
		return -func(forward_probabilities[x_end-1,y_end-2:y_end])

	return theano.map(compute_cost, [forward_probabilities, x_ends, y_ends])[0]
예제 #27
0
def chi2_test_statistic(M, Obs, K, num_M, num_Obs):
    #Getting frequencies from observations
    Ns = T.dot(Obs,T.ones((K,1)))
    p = Obs/Ns
        
    #Find the zeros so we can deal with them later
    pZEROs = T.eq(p, 0)
    mZEROs = T.eq(M, 0)
    
    #log probabilities, with -INF as log(0)
    lnM = T.log(M + mZEROs) - INF*mZEROs
    lnp = T.log(p + pZEROs) - INF*pZEROs


    #Using kroneker products so every row of M hits every row of P in the difference klnM - kln
    O_ones = T.ones((num_Obs,1))
    M_ones = T.ones((num_M,1))
    klnM = kron(lnM,O_ones)
    klnP = kron(M_ones, lnp)
    klnP_M = klnP - klnM
    kObs = kron(M_ones, Obs)
    
    G = 2.0*T.dot(klnP_M ,kObs.T)
    
    G = G*T.identity_like(G)
    G = T.dot(G,T.ones((num_M*num_Obs,1)))   
    G = T.reshape(G,(num_M,num_Obs))
    
    #The following quotient improves the convergence to chi^2 by an order of magnitude
    #source: http://en.wikipedia.org/wiki/Multinomial_test
    
    #numerator = T.dot(- 1.0/(M + 0.01),T.ones((K,1))) - T.ones((num_M,1))    
    #q1 = T.ones((num_M,num_Obs)) + T.dot(numerator,1.0/Ns.T/6.0)/(K-1.0)
        
    return G#/q1 
def custom_svrg1(loss, params, m=100, learning_rate=0.01):
    
    grads = theano.grad(loss, params)

    updates = OrderedDict()
    
    it_num = theano.shared(np.cast['int16'](0.))
    it = it_num + 1

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)

        mu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)

        grad_w_tilde = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)
        new_grad_w_tilde = theano.ifelse.ifelse(T.eq(it, m), grad, grad_w_tilde)

        mu_acc = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)

        updates[param] = param - learning_rate * (grad - grad_w_tilde + mu)
        updates[grad_w_tilde] = new_grad_w_tilde

        updates[mu] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), mu_acc, mu)
        updates[mu_acc] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), 0*mu_acc, mu_acc + grad)

    updates[it_num] = theano.ifelse.ifelse(T.eq(it, m), np.cast['int16'](1), np.cast['int16'](m))

    return updates
예제 #29
0
def form_dataset(doc, n_in):
    """
    Given a document and the number of input units, return the vector form  of the document segmented into units of
    length (n_in + 1)
    :param doc: String : Location of doc.
    :param n_in: Number of input units of the TreeLSTM
    :return: return the vector form of the document segmented into units of length(n_in + 1)
    """
    print 'Calling form_dataset()..'
    doc_obj = open(doc)
    data = tokenize(doc_obj.read().lower())
    data = data[:int(len(data)/(n_in+1)) * (n_in+1)]
    n_sen = len(data)/(n_in+1)
    data_x, data_y = np.asarray(data).reshape((n_sen, (n_in+1)))[:, :n_in], \
                     np.asarray(data).reshape((n_sen, (n_in+1)))[:, -1]
    data_x_vec = np.asarray([sentence_vec(data_x[i], word_vecs) for i in range(len(data_x))], dtype=theano.config.floatX)
    shared_x = theano.shared(np.concatenate(data_x_vec, axis=1), name='vec_data_x', borrow=True)
    shared_x_ = assert_op(shared_x, T.eq(shared_x.get_value().shape[0], vec_dims),
                          T.eq(shared_x.get_value().shape[1], n_sen*n_in))
    shared_y = theano.shared(np.asarray(sentence_vec(data_y, word_vecs),
                               dtype=theano.config.floatX), name='vec_data_y', borrow=True)
    shared_y_ = assert_op(shared_y, T.eq(shared_y.get_value().shape[0], vec_dims),
                          T.eq(shared_y.get_value().shape[1], n_sen))
    doc_obj.close()
    # Shape(vec_data_y) reshaped from Number of sentences * Vector Dimensions * 1 to Number of sentences * Vector Dims
    return shared_x_, shared_y_
예제 #30
0
def build_model(shared_params, options, other_params):
    """
    Build the complete neural network model and return the symbolic variables
    """
    # symbolic variables
    x = tensor.matrix(name="x", dtype=floatX)
    y1 = tensor.iscalar(name="y1")
    y2 = tensor.iscalar(name="y2")

    # lstm cell
    (ht, ct) = lstm_cell(x, shared_params, options, other_params)  # gets the ht, ct
    # softmax 1 i.e. frame type prediction
    activation = tensor.dot(shared_params['softmax1_W'], ht).transpose() + shared_params['softmax1_b']
    frame_pred = tensor.nnet.softmax(activation) # .transpose()

    # softmax 2 i.e. gesture class prediction
    #

    # predicted probability for frame type
    f_pred_prob = theano.function([x], frame_pred, name="f_pred_prob")
    # predicted frame type
    f_pred = theano.function([x], frame_pred.argmax(), name="f_pred")

    # cost
    cost = ifelse(tensor.eq(y1, 1), -tensor.log(frame_pred[0, 0] + options['log_offset'])
                  * other_params['begin_cost_factor'],
                  ifelse(tensor.eq(y1, 2), -tensor.log(frame_pred[0, 1] + options['log_offset'])
                         * other_params['end_cost_factor'],
                         ifelse(tensor.eq(y1, 3), -tensor.log(frame_pred[0, 2] + options['log_offset']),
                                tensor.abs_(tensor.log(y1)))), name='ifelse_cost')

    # function for output of the currect lstm cell and softmax prediction
    f_model_cell_output = theano.function([x], (ht, ct, frame_pred), name="f_model_cell_output")
    # return the model symbolic variables and theano functions
    return x, y1, y2, f_pred_prob, f_pred, cost, f_model_cell_output
예제 #31
0
def normalize(x, axis=-1):
    """return X divided by norm(x)
    If an element has zero norm, normalized element will still be zeros"""
    norms = norm(x, axis=axis, keepdims=True)
    return T.switch(T.eq(norms, 0), 0, x / norms)
예제 #32
0
파일: objectives.py 프로젝트: guozanhua/xnn
 def __call__(self, x, t):
     t_ = T.switch(T.eq(t, 0), -1, 1)
     scores = 1 - (t_ * x)
     return T.maximum(0, scores - self.threshold)
예제 #33
0
파일: sgd.py 프로젝트: rmunoz12/sfddd
    def train(self, Xs, Ys, Xv, Yv, mdl,
              data_folder='data/', out_folder='out/'):

        data_folder = os.path.join(data_folder, 'imgs/', 'train/')
        input_var = mdl.input_var
        net = mdl.get_output_layer()
        target_var = T.ivector('targets')

        prediction = lasagne.layers.get_output(net)
        loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
        loss = loss.mean()

        params = lasagne.layers.get_all_params(net, trainable=True)

        grads = T.grad(loss, params)

        test_prediction = lasagne.layers.get_output(net, deterministic=True)
        test_loss = lasagne.objectives. \
            categorical_crossentropy(test_prediction, target_var)
        test_loss = test_loss.mean()
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                          dtype=theano.config.floatX)

        logger.info("Compiling network functions...")
        grads_fn = theano.function([input_var, target_var], grads)
        train_fn = theano.function([input_var, target_var], loss)
        val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
        predict_proba = theano.function([input_var], test_prediction)

        logger.info("Training...")
        logger.info('GPU Free Mem: %.3f' % gpu_free_mem('gb'))

        # TODO change to steps
        epochs = self.max_iter / len(Xs)

        best_val_loss, best_epoch = None, None
        best_mdl_path = os.path.join(out_folder, 'best_model.npz')
        if not os.path.exists(out_folder):
            os.makedirs(out_folder)

        steps = 0
        for epoch in range(epochs):
            start_time = time.time()
            train_err, train_batches = 0, 0
            data_s = FileSystemData(Xs, Ys, data_folder, self.batch_size,
                                    infinite=False, augment=True, shuffle=True)
            step_err, step_g = 0, None

            for batch in tqdm(data_s, total=data_s.steps, leave=False):
                inputs, targets = batch
                inputs = floatX(np.array([mdl.preprocess(x) for x in inputs]))

                batch_err = train_fn(inputs, targets)
                batch_g = grads_fn(inputs, targets)

                if step_g is None:
                    step_g = batch_g
                else:
                    step_g = [s_g + b_g for s_g, b_g in zip(step_g, batch_g)]
                train_err += batch_err
                step_err += batch_err
                train_batches += 1
                if train_batches % self.iter_size == 0:
                    step_g = [g / np.array(self.iter_size) for g in step_g]

                    if steps == 0:
                        t_prev, m_prev, u_prev = \
                            init_adam(batch_g, params)
                    updates = step_adam(step_g, params, t_prev, m_prev, u_prev,
                                        learning_rate=self.base_lr)
                    for p, new_val in updates.items():
                        p.set_value(new_val)
                    steps += 1
                    step_err, step_g = 0, None

            data_v = FileSystemData(Xv, Yv, data_folder, self.batch_size,
                                    infinite=False, augment=False, shuffle=False)
            val_err, val_acc, val_batches = 0, 0, 0
            for batch in tqdm(data_v, total=data_v.steps, leave=False):
                inputs, targets = batch
                inputs = floatX(np.array([mdl.preprocess(x) for x in inputs]))
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            train_loss = train_err / train_batches
            val_loss = val_err / val_batches
            val_acc = val_acc / val_batches * 100
            end_time = time.time() - start_time

            if not best_val_loss or val_loss < best_val_loss:
                best_val_loss = val_loss
                best_epoch = epoch
                np.savez(best_mdl_path,
                         *lasagne.layers.get_all_param_values(net))
            snapshot_path = os.path.join(out_folder, 'snapshot_epoch_%d.npz'
                                         % epoch)
            np.savez(snapshot_path, *lasagne.layers.get_all_param_values(net))

            logger.info("epoch[%d] -- Ls: %.3f | Lv: %.3f | ACCv: %.3f | Ts: %.3f"
                        % (epoch, train_loss, val_loss, val_acc, end_time))

        logger.info("loading best model: epoch[%d]" % best_epoch)
        with np.load(best_mdl_path) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(net, param_values)

        return predict_proba
예제 #34
0
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.squared_error(prediction, target_var)
loss = loss.mean()

params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss,
                                            params,
                                            learning_rate=0.01,
                                            momentum=0.9)

test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.squared_error(test_prediction, target_var)
#test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
#                                                        target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

train_fn = theano.function([input_var, target_var], loss, updates=updates)

val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

num = 20
for epoch in range(num):
    train_err = train_fn(X_train, y_train)
    print(" iter: " + str(epoch) + "    training loss: " + str(train_err))

print(" ")
print(" ")
print(" ")
e = val_fn([[10, 10]], [[40]])
예제 #35
0
    def compile(self,
                optimizer,
                loss,
                class_mode="categorical",
                theano_mode=None):
        self.optimizer = optimizers.get(optimizer)

        self.loss = objectives.get(loss)
        weighted_loss = weighted_objective(objectives.get(loss))

        # input of model
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        self.weights = T.ones_like(self.y_train)

        if hasattr(self.layers[-1], "get_output_mask"):
            mask = self.layers[-1].get_output_mask()
        else:
            mask = None
        train_loss = weighted_loss(self.y, self.y_train, self.weights, mask)
        test_loss = weighted_loss(self.y, self.y_test, self.weights, mask)

        train_loss.name = 'train_loss'
        test_loss.name = 'test_loss'
        self.y.name = 'y'

        if class_mode == "categorical":
            train_accuracy = T.mean(
                T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train,
                                                         axis=-1)))
            test_accuracy = T.mean(
                T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test,
                                                         axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)),
                                    dtype='float32')
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)),
                                   dtype='float32')
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode
        self.theano_mode = theano_mode

        for r in self.regularizers:
            train_loss = r(train_loss)
        updates = self.optimizer.get_updates(self.params, self.constraints,
                                             train_loss)
        updates += self.updates

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y, self.weights]
            test_ins = self.X_test + [self.y, self.weights]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y, self.weights]
            test_ins = [self.X_test, self.y, self.weights]
            predict_ins = [self.X_test]

        self._train = theano.function(train_ins,
                                      train_loss,
                                      updates=updates,
                                      allow_input_downcast=True,
                                      mode=theano_mode)
        self._train_with_acc = theano.function(train_ins,
                                               [train_loss, train_accuracy],
                                               updates=updates,
                                               allow_input_downcast=True,
                                               mode=theano_mode)
        self._predict = theano.function(predict_ins,
                                        self.y_test,
                                        allow_input_downcast=True,
                                        mode=theano_mode)
        self._test = theano.function(test_ins,
                                     test_loss,
                                     allow_input_downcast=True,
                                     mode=theano_mode)
        self._test_with_acc = theano.function(test_ins,
                                              [test_loss, test_accuracy],
                                              allow_input_downcast=True,
                                              mode=theano_mode)
예제 #36
0
    def __init__(self, 
        glimpse_shape, glimpse_times, 
        dim_hidden, dim_fc, dim_out, 
        reward_base, 
        rng_std=1.0, activation=T.tanh, bptt_truncate=-1, 
        lmbd=0.1, # gdupdate + lmbd*rlupdate
        DEBUG=False,
        ): 
#       super(AttentionUnit, self).__init__()

        if reward_base == None: 
            reward_base = np.zeros((glimpse_times)).astype('float32')
            reward_base[-1] = 1.0
        x = T.ftensor3('x')  # N * W * H 
        y = T.ivector('y')  # label 
        lr = T.fscalar('lr')
        reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype(theano.config.floatX), borrow=True) # Time (vector)
        reward_bias = T.fvector('reward_bias')
#       rng = T.shared_randomstreams.RandomStreams(123)
        rng = MRG_RandomStreams(np.random.randint(9999999))
    
        i = InputLayer(x)
        au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate)
#       All hidden states are put into decoder
#       layers = [i, au, InputLayer(au.output[:,:,:].flatten(2))]
#       dim_fc = [glimpse_times*dim_hidden] + dim_fc + [dim_out]
#       Only the last hidden states
        layers = [i, au, InputLayer(au.output[:,-1,:])]
        dim_fc = [dim_hidden] + dim_fc + [dim_out]
        for Idim, Odim in zip(dim_fc[:-1], dim_fc[1:]):
            fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation, 'FC')
            layers.append(fc)
        sm = SoftmaxLayer(layers[-1].output)
        layers.append(sm)

        output = sm.output       # N * classes 
        hidoutput = au.output    # N * dim_output 
        location = au.location   # N * T * dim_hidden
        prediction = output.argmax(1) # N

        # calc
        equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...]
        correct = T.cast(T.sum(equalvec), 'float32')
#       noequalvec = T.neq(prediction, y)
#       nocorrect = T.cast(T.sum(noequalvec), 'float32')
        logLoss = T.log(output)[T.arange(y.shape[0]), y] # 
#       reward_biased = T.outer(equalvec, reward_base - reward_bias.dimshuffle('x', 0))
        reward_biased = T.outer(equalvec, reward_base) - reward_bias.dimshuffle('x', 0)
            # N * Time
            # (R_t - b_t), where b = E[R]
        
        # gradient descent
        gdobjective = logLoss.sum()/x.shape[0]  # correct * dim_output (only has value on the correctly predicted sample)
        gdparams = reduce(lambda x, y: x+y.params, layers, []) 
        gdupdates = map(lambda x: (x, x+lr*T.grad(gdobjective, x)), gdparams)

        # reinforce learning
        # without maximum, then -log(p) will decrease the p
        rlobjective = (T.maximum(reward_biased.dimshuffle(0, 1, 'x'), 0) * T.log(au.location_p)).sum() / correct 
            # location_p: N * Time * 2
            # location_logp: N * Time
            # reward_biased: N * 2
        rlparams = au.reinforceParams 
        rlupdates = map(lambda x: (x, x+lr*lmbd*T.grad(rlobjective, x)), rlparams)


        # Hidden state keeps unchange in time
        deltas = T.stack(*[((au.output[:,i,:].mean(0)-au.output[:,i+1,:].mean(0))**2).sum()  for i in xrange(glimpse_times-1)])
            # N * Time * dim_hidden
         
        print 'compile step()'
        self.step = theano.function([x, y, lr, reward_bias], [gdobjective, rlobjective, correct, T.outer(equalvec, reward_base)], updates=gdupdates+rlupdates)
    #       print 'compile gdstep()'
    #       self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates)
    #       print 'compile rlstep()'
    #       self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates)
        print 'compile predict()'
        self.predict = theano.function([x], prediction)
        if DEBUG:
            print 'compile glimpse()'
            self.glimpse = theano.function([x], au.glimpse) #[layers[-3].output, fc.output])
            print 'compile innerstate()'
            self.getinnerstate = theano.function([x], au.innerstate)
            print 'compile forward()'
            self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output])
            print 'compile error()'
            self.error = theano.function([x, y, reward_bias], [gdobjective, rlobjective])
        print 'compile locate()'
        self.locate = theano.function([x], [au.location_mean, location]) #[layers[-3].output, fc.output])
        print 'compile debug()'
        self.debug = theano.function([x, y, lr, reward_bias], [deltas, au.location_p], on_unused_input='warn')


        # self.xxx
        self.layers = layers
        self.params = gdparams + rlparams
        self.glimpse_times = glimpse_times
예제 #37
0
def in1d(arr, in_arr):
    """for each element in arr returns 1 if in_arr contains this element, otherwise 0
    Output shape matches arr shape, in_arr must be 1d"""

    return T.eq(arr.reshape([1, -1]),
                in_arr.reshape([-1, 1])).any(axis=0).reshape(arr.shape)
예제 #38
0
파일: discrete.py 프로젝트: zivtigher/pymc3
 def logp(self, value):
     c = self.c
     return bound(0, tt.eq(value, c))
예제 #39
0
    def fit(self, X, y):
        import lasagne
        import theano
        import theano.tensor as T
        """Fit model."""
        # Create a loss expression for training, i.e., a scalar objective we want
        # to minimize (for our multi-class problem, it is the cross-entropy loss):
        prediction = lasagne.layers.get_output(self.network)
        loss = lasagne.objectives.binary_crossentropy(prediction, T.vector)
        loss = loss.mean()
        # We could add some weight decay as well here, see lasagne.regularization.

        # Create update expressions for training, i.e., how to modify the
        # parameters at each training step. Here, we'll use Stochastic Gradient
        # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
        params = lasagne.layers.get_all_params(self.network, trainable=1)
        updates = lasagne.updates.nesterov_momentum(loss,
                                                    params,
                                                    learning_rate=0.01,
                                                    momentum=0.9)

        # Create a loss expression for validation/testing. The crucial difference
        # here is that we do a deterministic forward pass through the network,
        # disabling dropout layers.
        test_prediction = lasagne.layers.get_output(self.network,
                                                    deterministic=True)
        test_loss = lasagne.objectives.categorical_crossentropy(
            test_prediction, T.vector)
        test_loss = test_loss.mean()
        # As a bonus, also create an expression for the classification accuracy:
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.vector),
                          dtype=theano.config.floatX)

        # Compile a function performing a training step on a mini-batch (by giving
        # the updates dictionary) and returning the corresponding training loss:
        train_fn = theano.function([T.vector, T.vector], loss, updates=updates)

        # Compile a second function computing the validation loss and accuracy:
        val_fn = theano.function([T.vector, T.vector], [test_loss, test_acc])

        # Finally, launch the training loop.
        print("Starting training...")
        # We iterate over epochs:
        for epoch in range(self.model_params['num_epochs']):
            # In each epoch, we do a full pass over the training data:
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train,
                                             y_train,
                                             500,
                                             shuffle=True):
                inputs, targets = batch
                train_err += train_fn(inputs, targets)
                train_batches += 1

            # And a full pass over the validation data:
            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err /
                                                      train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            pass
예제 #40
0
            lasagne.objectives.squared_error(prediction, train_prediction_b))
    #        loss=loss+pi_loss
    elif model.network_type == "tempens":
        # Tempens model loss:
        loss = T.mean(loss * mask_train, dtype=theano.config.floatX)
        loss += unsup_weight_var * T.mean(
            lasagne.objectives.squared_error(prediction, z_target_var))
    else:
        loss = T.mean(loss, dtype=theano.config.floatX)

    # regularization:L1,L2
    l2_penalty = lasagne.regularization.regularize_network_params(
        gru_network, lasagne.regularization.l2) * model.l2_loss
    loss = loss + l2_penalty

    train_acc = T.mean(T.eq(T.argmax(prediction, axis=1),
                            T.argmax(target_var, axis=1)),
                       dtype=theano.config.floatX)
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(gru_network, trainable=True)
    updates = lasagne.updates.adam(loss,
                                   params,
                                   learning_rate=learning_rate_var,
                                   beta1=adam_beta1_var)
    """
    3.test loss and accuracy
    """
    # Create a loss expression for validation/testing. The crucial difference
output = architecture.buildDCNN()
dcnnParams = lasagne.layers.get_all_params(output)

# SYMBOLIC INPUTS
x = T.imatrix()
y = T.ivector()

# Without L2 Regularization 
loss = lasagne.objectives.aggregate(
    lasagne.objectives.categorical_crossentropy(
        lasagne.layers.get_output(output, x), y), mode = 'mean')
updates = lasagne.updates.adagrad(loss, dcnnParams, learning_rate = 0.1)

# ACCURACY FOR PREDICTIONS
prediction = T.argmax(lasagne.layers.get_output(output, x, deterministic=True), axis=1)
score = T.eq(prediction, y).mean()

# SYMBOLIC FUNCTIONS
trainDCNN = theano.function([x,y], outputs = loss, updates = updates)
validateDCNN = theano.function([x,y], outputs = score)
testDCNN = theano.function([x,y], outputs = score)

# LOAD THE DATA
trainingSentences = loader.loadData('myDataset/train.txt')
trainingLabels = loader.loadData('myDataset/train_label.txt')
validationSentences = loader.loadData('myDataset/dev.txt')
validationLabels = loader.loadData('myDataset/dev_label.txt')
testSentences = loader.loadData('myDataset/test.txt')
testLabels = loader.loadData('myDataset/test_label.txt')

# TRAIN THE MODEL
예제 #42
0
 def get_output_mask(self, train=None):
     X = self.get_input(train)
     if not self.mask_zero:
         return None
     else:
         return T.ones_like(X) * (1 - T.eq(X, 0))
예제 #43
0
def unk_ratio(words, mask, unk):
    num_unk = (tensor.eq(words, unk) * mask).sum()
    return num_unk / mask.sum()
big_frame_level_outputs, new_big_h0 = big_frame_level_rnn(big_input_sequences, big_h0, reset)#tier3->tier2

frame_level_outputs, new_h0 = frame_level_rnn(input_sequences, big_frame_level_outputs, h0, reset)#tier2->tier1

prev_samples = sequences_8k[:,0:-(OVERLAP-FRAME_SIZE+1)]
prev_samples = prev_samples.reshape((1, batch_size, 1, -1))
prev_samples = T.nnet.neighbours.images2neibs(prev_samples, (1,  FRAME_SIZE), neib_step=(1, 1), mode='valid') #2-dim:([[x7,x8],[x8,x9],[x9,x10],...])
prev_samples = prev_samples.reshape((batch_size * SEQ_LEN,  FRAME_SIZE))


sample_level_outputs = sample_level_predictor(
    frame_level_outputs.reshape((batch_size * SEQ_LEN, DIM)),
    prev_samples
)          #sample_level_outputs dim:(BATCH_SIZE * SEQ_LEN, Q_LEVELS) -> [[x9pre],[x10pre],...]

accuracy=T.eq(lib.ops.softmax_and_no_sample(sample_level_outputs.reshape((batch_size,SEQ_LEN,Q_LEVELS))),target_sequences)
accuracy=accuracy*target_mask
accuracy=T.sum(accuracy,axis=1)
mask_sum=T.sum(target_mask,axis=1)

cost = T.nnet.categorical_crossentropy(
    T.nnet.softmax(sample_level_outputs),  #Every row represents a distribution(256 propability)
    target_sequences.flatten()    #A list, represent the groundtruth of every row
)
cost = cost.reshape(target_sequences.shape)
cost = cost * target_mask #dim: batch*num
# Don't use these lines; could end up with NaN
# Specially at the end of audio files where mask is
# all zero for some of the shorter files in mini-batch.
#cost = cost.sum(axis=1) / target_mask.sum(axis=1)
#cost = cost.mean(axis=0)
예제 #45
0
    def __init__(self,
                 input_width,
                 input_height,
                 num_actions,
                 num_frames,
                 discount,
                 learning_rate,
                 rho,
                 rms_epsilon,
                 momentum,
                 clip_delta,
                 freeze_interval,
                 batch_size,
                 network_type,
                 update_rule,
                 batch_accumulator,
                 rng,
                 input_scale=255.0):

        self.input_width = input_width
        self.input_height = input_height
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self.rng = rng

        lasagne.random.set_rng(self.rng)

        self.update_counter = 0

        self.l_out = self.build_network(network_type, input_width,
                                        input_height, num_actions, num_frames,
                                        batch_size)
        if self.freeze_interval > 0:
            self.next_l_out = self.build_network(network_type, input_width,
                                                 input_height, num_actions,
                                                 num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        # Shared variables for training from a minibatch of replayed
        # state transitions, each consisting of num_frames + 1 (due to
        # overlap) images, along with the chosen action and resulting
        # reward and terminal status.
        self.imgs_shared = theano.shared(
            np.zeros((batch_size, num_frames + 1, input_height, input_width),
                     dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros(
            (batch_size, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((batch_size, 1),
                                                     dtype='int32'),
                                            broadcastable=(False, True))
        self.terminals_shared = theano.shared(np.zeros((batch_size, 1),
                                                       dtype='int32'),
                                              broadcastable=(False, True))

        # Shared variable for a single state, to calculate q_vals.
        self.state_shared = theano.shared(
            np.zeros((num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)

        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                    next_states / input_scale)
        else:
            next_q_vals = lasagne.layers.get_output(self.l_out,
                                                    next_states / input_scale)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        terminalsX = terminals.astype(theano.config.floatX)
        actionmask = T.eq(
            T.arange(num_actions).reshape((1, -1)), actions.reshape(
                (-1, 1))).astype(theano.config.floatX)

        target = (rewards + (T.ones_like(terminalsX) - terminalsX) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        output = (q_vals * actionmask).sum(axis=1).reshape((-1, 1))
        diff = target - output

        if self.clip_delta > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            #
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = 0.5 * quadratic_part**2 + self.clip_delta * linear_part
        else:
            loss = 0.5 * diff**2

        if batch_accumulator == 'sum':
            loss = T.sum(loss)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

        params = lasagne.layers.helper.get_all_params(self.l_out)
        train_givens = {
            states: self.imgs_shared[:, :-1],
            next_states: self.imgs_shared[:, 1:],
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        if update_rule == 'deepmind_rmsprop':
            updates = deepmind_rmsprop(loss, params, self.lr, self.rho,
                                       self.rms_epsilon)
        elif update_rule == 'rmsprop':
            updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho,
                                              self.rms_epsilon)
        elif update_rule == 'sgd':
            updates = lasagne.updates.sgd(loss, params, self.lr)
        else:
            raise ValueError("Unrecognized update: {}".format(update_rule))

        if self.momentum > 0:
            updates = lasagne.updates.apply_momentum(updates, None,
                                                     self.momentum)

        self._train = theano.function([], [loss],
                                      updates=updates,
                                      givens=train_givens)
        q_givens = {
            states:
            self.state_shared.reshape(
                (1, self.num_frames, self.input_height, self.input_width))
        }
        self._q_vals = theano.function([], q_vals[0], givens=q_givens)
예제 #46
0
def main():

    print("Loading Data")
    X_train, y_train, X_valid, y_valid, X_test, y_test = load_data.load_data_feautre_train(feautre = u"\uBC18\uD314",root_path= "/home/prosurpa/Image/image/",image_size=(28,28))

    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    print("Bulding Model")

    batch_size = 20

    network = build_f_cnn(batch_size ,input_var)


    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.01, momentum=0.9
    )


    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)

    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    #model_rw.read_model_data(network, "75.0000009934model")

    print("Starting training")


    num_epochs = 1000
    best_acc = 75
    for epoch in range(num_epochs):
        train_err = 0
        train_batches = 0
        start_time = time.time()

        print((len(X_train)/batch_size))
        for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1
            if train_batches%20 == 0:
                print(train_batches)



        val_err = 0
        val_acc = 0
        val_batches = 0

        print((len(X_valid) / batch_size))
        for batch in iterate_minibatches(X_valid, y_valid, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1
            if train_batches % 20 == 0:
                print(val_batches)


        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))

        test_err = 0
        test_acc = 0
        test_batches = 0

        print((len(X_test) / batch_size))
        for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            test_err += err
            test_acc += acc
            test_batches += 1
            if train_batches % 20 == 0:
                print(test_batches)


        print("Final results:")
        print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
        print("  test accuracy:\t\t{:.2f} %".format(
            test_acc / test_batches * 100))

        re_acc = test_acc / test_batches * 100

        if re_acc > best_acc + 0.5:
            best_acc = re_acc
            model_rw.write_model_data(network, str(best_acc) + "model")
예제 #47
0
    def get_reward(self, session_states, session_actions, batch_id):
        """
        WARNING! this runs on a single session, not on a batch
        reward given for taking the action in current environment state
        arguments:
            session_states float[time, memory_id]: environment state before taking action
            session_actions int[time]: agent action at this tick
        returns:
            reward float[time]: reward for taking action from the given state
        """
        #unpach states and actions
        session_states = check_list(session_states)[0]
        session_actions = check_list(session_actions)[0]

        time_range = T.arange(session_actions.shape[0])

        has_tried_already = session_states[time_range, session_actions]
        session_is_active = T.eq(session_states[:, self.end_action_id], 0)

        has_finished_now = T.eq(session_actions, self.end_action_id)
        has_finished_now = T.set_subtensor(has_finished_now[-1], 1)
        end_tick = has_finished_now.nonzero()[0][0]

        action_is_categorical = in1d(session_actions, self.category_action_ids)

        response = self.joint_data[batch_id, session_actions].ravel()

        at_least_one_category_guessed = T.any(action_is_categorical[:end_tick]
                                              & (response[:end_tick] > 0))

        #categorical and attributes
        reward_for_intermediate_action = T.switch(
            action_is_categorical,
            response *
            (self.rw["category_positive"] - self.rw["category_negative"]) +
            self.rw["category_negative"],
            response *
            (self.rw["attribute_positive"] - self.rw["attribute_negative"]) +
            self.rw["attribute_negative"])
        reward_for_intermediate_action_first_time = T.switch(
            has_tried_already,
            self.rw["repeated_poll"],
            reward_for_intermediate_action,
        )

        #ending session
        reward_for_end_action = T.switch(
            at_least_one_category_guessed,  #if chosen at least 1 category
            self.rw["end_action"],  #do not penalize
            self.rw["end_action_if_no_category_predicted"])  #else punish

        #include end action
        reward_for_action = T.switch(
            has_finished_now,
            reward_for_end_action,
            reward_for_intermediate_action_first_time,
        )

        final_reward = T.switch(
            session_is_active,
            reward_for_action,
            0,
        )

        return final_reward.astype(theano.config.floatX)
예제 #48
0
def train_cnn_for_el(train_data_file_name,
                     val_data_file_name,
                     num_val_candidates,
                     test_data_file_name,
                     num_test_candidates,
                     img_h, img_w,
                     all_words,  # first row of all_words should be a non-existing word
                     wid_idx_dict,
                     entity_vecs,
                     gold_as_first_candidate=False,
                     skip_width_loading=40,  # skip width while loading samples
                     n_epochs=25,
                     batch_size=50,
                     filter_hs=def_filter_hs,
                     num_feature_maps=100,
                     conv_non_linear="relu",
                     lr_decay=0.9,
                     sqr_norm_lim=9,
                     hidden_out_len=50,):
    rng = np.random.RandomState(3435)

    x = T.imatrix('x')
    # es = T.imatrix('es')
    # es_test = T.imatrix('es_test')
    entities = T.imatrix('entities')

    print 'making entity_vecs...', len(entity_vecs)
    shared_entity_vecs = theano.shared(value=np.asarray(entity_vecs, dtype=theano.config.floatX),
                                       name='entity_vecs', borrow=True)
    # shared_entity_vecs = theano.shared(value=np.asarray(entity_vecs, dtype=np.float32),
    #                                    name='entity_vecs', borrow=True)
    print 'making shared_words...', len(all_words)
    shared_words = theano.shared(value=np.asarray(all_words, dtype=theano.config.floatX),
                                 name='shared_words', borrow=True)
    print 'done'

    # test_contexts, test_indices = get_data_set_full(test_data_file_name, wid_idx_dict, skip_width_loading)
    # num_test_batches = test_indices.shape[0] / batch_size
    # num_val_contexts, val_contexts, val_indices = get_data_set_full(val_data_file_name,
    #                                                                 wid_idx_dict, skip_width_loading)
    val_contexts, val_indices = data_load.load_samples_full(val_data_file_name, wid_idx_dict, sentence_len,
                                                            sentence_pad_len,
                                                            skip_width=skip_width_loading,
                                                            num_candidates=num_val_candidates)
    num_val_batches = len(val_contexts) / batch_size
    print num_val_batches, 'validation batches'
    print len(val_indices[0]), 'candidates per mention'

    if gold_as_first_candidate:
        gold_labels = theano.shared(value=np.zeros(batch_size,
                                                   dtype='int32'),
                                    borrow=True)
    else:
        gold_labels = theano.shared(value=np.ones(batch_size,
                                                  dtype='int32'),
                                    borrow=True)

    val_contexts = T.cast(to_theano_shared(val_contexts), 'int32')
    val_indices = T.cast(to_theano_shared(val_indices), 'int32')

    filter_shapes = []
    pool_sizes = []
    filter_w = img_w
    for filter_h in filter_hs:
        filter_shapes.append((num_feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))

    layer0_input = shared_words[x.flatten()].reshape((x.shape[0], 1, x.shape[1], shared_words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_h, img_w),
                                        filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)

    layer1_input = T.concatenate(layer1_inputs, 1)
    matcher0 = HiddenLayer(rng, layer1_input, num_feature_maps * len(filter_hs),
                           hidden_out_len, relu)
    mc = matcher0.output  # mention contexts

    unit_mc = mc / T.sqrt(T.maximum(T.sum(T.sqr(mc), 1), 0.0001)).dimshuffle(0, 'x')

    batch_entity_vecs = shared_entity_vecs[entities]
    matcher1 = HiddenLayer(rng, batch_entity_vecs, len(entity_vecs[0]), hidden_out_len, relu)
    entity_reps = matcher1.output
    # entity_reps = batch_entity_vecs

    unit_entity_reps = entity_reps / T.sqrt(T.maximum(T.sum(T.sqr(entity_reps), 2), 0.0001)).dimshuffle(0, 1, 'x')

    similarities = (unit_mc.dimshuffle(0, 'x', 1) * unit_entity_reps).sum(axis=2)
    correct_rate = T.mean(T.eq(gold_labels, T.argmax(similarities, axis=1)))

    loss = T.maximum(0, 1 - similarities[:, 0] + similarities[:, 1]).sum()

    # similarities = (mc.dimshuffle(0, 'x', 1) * batch_entity_vecs).sum(axis=2)  # / mc_norm

    params = matcher0.params + matcher1.params
    # params = matcher0.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    grad_updates = sgd_updates_adadelta(params, loss, lr_decay, 1e-6, sqr_norm_lim)

    index = T.lscalar()

    # test_model = theano.function(
    #     [index],
    #     error_rate,
    #     givens={x: test_contexts[index * batch_size: (index + 1) * batch_size],
    #             es: test_indices[index * batch_size: (index + 1) * batch_size]}
    # )

    val_model = theano.function(
        [index],
        correct_rate,
        givens={x: val_contexts[index * batch_size: (index + 1) * batch_size],
                entities: val_indices[index * batch_size: (index + 1) * batch_size]}
    )

    train_contexts = theano.shared(
        value=np.zeros((3, 2)),
        borrow=True)
    int_train_contexts = T.cast(train_contexts, 'int32')
    train_indices = theano.shared(
        value=np.zeros((3, 2)),
        borrow=True)
    int_train_indices = T.cast(train_indices, 'int32')
    train_model = theano.function(
        [index],
        loss,
        updates=grad_updates,
        givens={x: int_train_contexts[index * batch_size: (index + 1) * batch_size],
                entities: int_train_indices[index * batch_size: (index + 1) * batch_size]}
    )

    fdebug = theano.function(
        [index],
        similarities,
        givens={x: int_train_contexts[index * batch_size: (index + 1) * batch_size],
                entities: int_train_indices[index * batch_size: (index + 1) * batch_size]}
    )
    # print fdebug(0)

    val_perfs = [val_model(i) for i in xrange(num_val_batches)]
    print('init val perf %f' % np.mean(val_perfs))

    print 'training ...'
    f_train = open(train_data_file_name, 'rb')
    epoch = 0
    while epoch < n_epochs:
        epoch += 1

        train_part_cnt = 0
        # num_train_contexts, cur_train_contexts, cur_train_indices = get_data_set_part(
        #     f_train, wid_idx_dict, 50000)
        cur_train_contexts, cur_train_indices = data_load.load_training_samples(f_train,
                                                                                training_part_size,
                                                                                wid_idx_dict,
                                                                                sentence_len,
                                                                                sentence_pad_len)
        while not len(cur_train_contexts) == 0:
            train_contexts.set_value(cur_train_contexts, borrow=True)
            train_indices.set_value(cur_train_indices, borrow=True)
            # print fdebug(0)

            train_part_cnt += 1
            num_train_batches = len(cur_train_contexts) / batch_size
            # print 'num_train_batches', num_train_batches
            mean_loss = 0
            for minibatch_index in xrange(num_train_batches):
                cur_loss = train_model(minibatch_index)
                mean_loss += cur_loss
                # if (minibatch_index + 1) % (num_train_batches / 3) == 0:  # show some progress
                #     print minibatch_index, num_train_batches
            print 'loss:', mean_loss / num_train_batches
            # print fdebug(0)

            val_perfs = [val_model(i) for i in xrange(num_val_batches)]
            val_perf = np.mean(val_perfs)
            print('epoch %i, training part %i, val perf %f'
                  % (epoch, train_part_cnt, val_perf))
            cur_train_contexts, cur_train_indices = data_load.load_training_samples(f_train,
                                                                                    training_part_size,
                                                                                    wid_idx_dict,
                                                                                    sentence_len,
                                                                                    sentence_pad_len)
            # num_train_contexts, cur_train_contexts, cur_train_indices = get_data_set_part(
            #     f_train, wid_idx_dict, 50000)

    f_train.close()
    def __init__(self,
                 embedding_dim=100,
                 num_hidden_layers=2,
                 hidden_dim=200,
                 in_dropout_p=0.2,
                 hidden_dropout_p=0.5,
                 hidden2out_dropout_p=0.5,
                 update_hyperparams={'learning_rate': 0.01}):
        self.embedding_dim = embedding_dim
        self.num_hidden_layers = num_hidden_layers
        self.hidden_dim = hidden_dim
        self.in_dropout_p = in_dropout_p
        self.hidden_dropout_p = hidden_dropout_p
        self.hidden2out_dropout_p = hidden2out_dropout_p
        self.update_hyperparameters = update_hyperparams

        print >> sys.stderr, 'Building computation graph for discriminator...'
        self.input_var = T.matrix('input')
        self.target_var = T.matrix('targer')

        self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim),
                                              input_var=T.tanh(self.input_var),
                                              name='l_in')
        self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in,
                                                   self.in_dropout_p)
        self.l_prehid = lasagne.layers.batch_norm(
            lasagne.layers.DenseLayer(
                self.l_in_dr,
                num_units=self.hidden_dim,
                nonlinearity=lasagne.nonlinearities.leaky_rectify,
                W=lasagne.init.GlorotUniform(gain=leaky_relu_gain),
                name='l_prehid'))
        self.layers = [self.l_in, self.l_in_dr, self.l_prehid]
        for i in xrange(self.num_hidden_layers):
            l_hid_predr = lasagne.layers.DropoutLayer(self.layers[-1],
                                                      self.hidden_dropout_p)
            l_hid = lasagne.layers.batch_norm(
                lasagne.layers.DenseLayer(
                    l_hid_predr,
                    num_units=self.hidden_dim,
                    nonlinearity=lasagne.nonlinearities.leaky_rectify,
                    W=lasagne.init.GlorotUniform(gain=leaky_relu_gain),
                    name=('l_hid_%s' % i)))
            l_hid_sum = lasagne.layers.ElemwiseSumLayer(
                [self.layers[-1], l_hid])
            self.layers.append(l_hid_predr)
            self.layers.append(l_hid)
            self.layers.append(l_hid_sum)

        self.l_preout_predr = lasagne.layers.DropoutLayer(
            self.layers[-1], self.hidden2out_dropout_p)
        self.l_preout = lasagne.layers.batch_norm(
            lasagne.layers.DenseLayer(self.l_preout_predr,
                                      num_units=1,
                                      nonlinearity=None,
                                      name='l_preout'))
        self.l_out = lasagne.layers.NonlinearityLayer(
            self.l_preout,
            nonlinearity=lasagne.nonlinearities.sigmoid,
            name='l_out')

        self.prediction = lasagne.layers.get_output(self.l_out)
        self.loss = lasagne.objectives.binary_crossentropy(
            self.prediction, self.target_var).mean()
        self.accuracy = T.eq(T.ge(self.prediction, 0.5),
                             self.target_var).mean()

        self.params = lasagne.layers.get_all_params(self.l_out, trainable=True)
        self.updates = lasagne.updates.adam(self.loss, self.params,
                                            **update_hyperparams)

        print >> sys.stderr, 'Compiling discriminator...'
        self.train_fn = theano.function([self.input_var, self.target_var],
                                        [self.loss, self.accuracy],
                                        updates=self.updates)
        self.eval_fn = theano.function([self.input_var, self.target_var],
                                       [self.loss, self.accuracy])
예제 #50
0
    def create_structure(self):
        """Creates the symbolic graph of this layer.

        The input is always 3-dimensional: the first dimension is the time step,
        the second dimension are the sequences, and the third dimension is the
        word projection. When generating text, there's just one sequence and one
        time step in the input.

        Sets self.output to a symbolic matrix that describes the output of this
        layer. Assumes that the shared variables have been passed using
        ``set_params()``.
        """

        layer_input = tensor.concatenate([x.output for x in self.input_layers],
                                         axis=2)
        preact = self._tensor_preact(layer_input, 'input')

        # Combine the first two dimensions so that softmax is taken
        # independently for each location, over the output classes. This
        # produces probabilities for the whole vocabulary.
        num_time_steps = preact.shape[0]
        num_sequences = preact.shape[1]
        output_size = preact.shape[2]
        preact = preact.reshape([num_time_steps * num_sequences, output_size])
        self.output_probs = tensor.nnet.softmax(preact)
        self.output_probs = self.output_probs.reshape(
            [num_time_steps, num_sequences, output_size])
        if self.network.mode.is_distribution():
            return

        # We should predict probabilities of the target outputs, i.e. the words
        # at the next time step.
        if self.network.mode.is_target_words():
            output_probs = self.output_probs
            target_class_ids = self.network.target_class_ids
        else:
            output_probs = self.output_probs[:-1]
            target_class_ids = self.network.class_input[1:]
            num_time_steps -= 1

        assert_op = tensor.opt.Assert(
            "Mismatch in mini-batch and target classes shape.")
        target_class_ids = assert_op(
            target_class_ids,
            tensor.eq(target_class_ids.shape[0], output_probs.shape[0]))
        target_class_ids = assert_op(
            target_class_ids,
            tensor.eq(target_class_ids.shape[1], output_probs.shape[1]))

        # An index to a flattened input matrix times the vocabulary size can be
        # used to index the same location in the output matrix. The class ID is
        # added to index the probability of that word.
        output_probs = output_probs.flatten()
        target_class_ids = target_class_ids.flatten()
        minibatch_size = target_class_ids.shape[0]
        num_classes = self.network.vocabulary.num_classes()
        output_probs = assert_op(
            output_probs,
            tensor.eq(output_probs.shape[0], minibatch_size * num_classes))
        target_indices = tensor.arange(minibatch_size) * num_classes
        target_indices += target_class_ids
        self.target_probs = output_probs[target_indices]
        self.target_probs = self.target_probs.reshape(
            [num_time_steps, num_sequences])
예제 #51
0
 def accuracy(self, y):
     "Return the accuracy for the mini-batch."
     return T.mean(T.eq(y, self.y_out))
    lasagne.layers.set_all_param_values(net['prob'], params)


n_batches_per_epoch = np.floor(n_training_samples/float(BATCH_SIZE))
n_test_batches = np.floor(n_val_samples/float(BATCH_SIZE))

x_sym = T.tensor4()
y_sym = T.ivector()

l2_loss = lasagne.regularization.regularize_network_params(net['prob'], lasagne.regularization.l2) * 5e-4

prediction_train = lasagne.layers.get_output(net['prob'], x_sym, deterministic=False)
loss = lasagne.objectives.categorical_crossentropy(prediction_train, y_sym)
loss = loss.mean()
loss += l2_loss
acc_train = T.mean(T.eq(T.argmax(prediction_train, axis=1), y_sym), dtype=theano.config.floatX)

prediction_test = lasagne.layers.get_output(net['prob'], x_sym, deterministic=True)
loss_val = lasagne.objectives.categorical_crossentropy(prediction_test, y_sym)
loss_val = loss_val.mean()
loss_val += l2_loss
acc = T.mean(T.eq(T.argmax(prediction_test, axis=1), y_sym), dtype=theano.config.floatX)

params = lasagne.layers.get_all_params(net['prob'], trainable=True)
learning_rate = theano.shared(np.float32(0.001))
updates = lasagne.updates.adam(loss, params, learning_rate=learning_rate)

train_fn = theano.function([x_sym, y_sym], [loss, acc_train], updates=updates)
val_fn = theano.function([x_sym, y_sym], [loss_val, acc])
pred_fn = theano.function([x_sym], prediction_test)
예제 #53
0
def main(num_epochs=100):
    # Load the dataset
    print("Loading data...")
    datasets = load_data()
    X_train, y_train = datasets[0]
    X_val, y_val = datasets[1]
    X_test, y_test = datasets[2]
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    learnrate = 0.01
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")

    network = build_cnn(input_var)
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    l2_penalty = regularize_layer_params(network, l2)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean() + 0.1 * l2_penalty
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=learnrate,
                                                momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 20, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 20, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 20, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    np.savez('model2.npz', *lasagne.layers.get_all_param_values(network))
예제 #54
0
    def compile_theano_functions(self, data_type='2D', loss='cross_entropy'):
        assert self.net != None

        ### symbolic theano input
        theano_args = OrderedDict()
        dim = len(self.cf.dim)

        if data_type == '2D':
            assert dim == 2
            theano_args['X'] = T.tensor4()
            theano_args['y'] = T.tensor4()
            theano_args['c'] = T.ivector()
            self.logger.info('Net: Working with 2D data.')
            val_args = deepcopy(theano_args)
            train_args = deepcopy(theano_args)
            train_args['lr'] = T.scalar(name='lr')
            train_args['lw'] = T.scalar(name='lw')

            ### class prediction functions
            class_layer = self.net[self.cf.class_layer]
            class_train_prediction = get_output(class_layer,
                                                train_args['X'],
                                                deterministic=False)
            class_val_prediction = get_output(class_layer,
                                              val_args['X'],
                                              deterministic=True)
            attention_val_prediction = get_output(
                self.net[self.cf.attention_layer],
                val_args['X'],
                deterministic=True)

            self.class_predict_smax['train'] = theano.function(
                [train_args['X']], class_train_prediction)
            self.class_predict_smax['val'] = theano.function(
                [val_args['X']], class_val_prediction)
            self.attention_predict = theano.function([val_args['X']],
                                                     attention_val_prediction)

            # get flattened softmax prediction of shape (pixels, classes), where pixels = b*0*1
            prediction_train_smax_flat = get_output(
                self.net[self.cf.seg_out_layer_flat],
                train_args['X'],
                deterministic=False)
            prediction_val_smax_flat = get_output(
                self.net[self.cf.seg_out_layer_flat],
                val_args['X'],
                deterministic=True)

            # reshape softmax prediction: shapes (pixels,c) -> (b,c,0,1)
            prediction_train_smax = prediction_train_smax_flat.reshape(
                (train_args['X'].shape[0], self.cf.dim[0], self.cf.dim[1],
                 self.cf.num_classes)).transpose((0, 3, 1, 2))
            prediction_val_smax = prediction_val_smax_flat.reshape(
                (val_args['X'].shape[0], self.cf.dim[0], self.cf.dim[1],
                 self.cf.num_classes)).transpose((0, 3, 1, 2))
            self.predict_smax['train'] = theano.function([train_args['X']],
                                                         prediction_train_smax)
            self.predict_smax['val'] = theano.function([val_args['X']],
                                                       prediction_val_smax)

            # reshape target vector: shapes (b,c,0,1) -> (b*0*1,c)
            flat_target_train = train_args['y'].transpose(
                (0, 2, 3, 1)).reshape((-1, self.cf.num_classes))
            flat_target_val = val_args['y'].transpose((0, 2, 3, 1)).reshape(
                (-1, self.cf.num_classes))

        elif data_type == '3D':
            assert dim == 3
            theano_args['X'] = T.tensor5()
            theano_args['y'] = T.tensor5()
            theano_args['c'] = T.ivector()
            self.logger.info('Net: Working with 3D data.')
            val_args = deepcopy(theano_args)
            train_args = deepcopy(theano_args)
            train_args['lr'] = T.scalar(name='lr')

            ### prediction functions

            # get flattened softmax prediction of shape (pixels, classes), where pixels = b*0*1*2
            prediction_train_smax_flat = get_output(
                self.net[self.cf.seg_out_layer_flat],
                train_args['X'],
                deterministic=False)
            prediction_val_smax_flat = get_output(
                self.net[self.cf.seg_out_layer_flat],
                val_args['X'],
                deterministic=True)

            # reshape softmax prediction: shapes (pixels,c) -> (b,c,0,1,2)
            prediction_train_smax = prediction_train_smax_flat.reshape(
                (train_args['X'].shape[0], self.cf.dim[0], self.cf.dim[1],
                 self.cf.dim[2], self.cf.num_classes)).transpose(
                     (0, 4, 1, 2, 3))
            prediction_val_smax = prediction_val_smax_flat.reshape(
                (val_args['X'].shape[0], self.cf.dim[0], self.cf.dim[1],
                 self.cf.dim[2], self.cf.num_classes)).transpose(
                     (0, 4, 1, 2, 3))
            self.predict_smax['train'] = theano.function([train_args['X']],
                                                         prediction_train_smax)
            self.predict_smax['val'] = theano.function([val_args['X']],
                                                       prediction_val_smax)

            # reshape target vector: shapes (b,c,0,1,2) -> (b*0*1*2,c)
            flat_target_train = train_args['y'].transpose(
                (0, 2, 3, 4, 1)).reshape((-1, self.cf.num_classes))
            flat_target_val = val_args['y'].transpose((0, 2, 3, 4, 1)).reshape(
                (-1, self.cf.num_classes))

        pred_train_one_hot = get_one_hot_prediction(prediction_train_smax,
                                                    self.cf.num_classes)
        pred_val_one_hot = get_one_hot_prediction(prediction_val_smax,
                                                  self.cf.num_classes)
        self.predict_one_hot['val'] = theano.function([val_args['X']],
                                                      pred_val_one_hot)
        self.predict_one_hot['train'] = theano.function([train_args['X']],
                                                        pred_train_one_hot)

        prediction_val = T.argmax(prediction_val_smax, axis=1)
        prediction_train = T.argmax(prediction_train_smax, axis=1)
        self.predict['val'] = theano.function([val_args['X']], prediction_val)
        self.predict['train'] = theano.function([train_args['X']],
                                                prediction_train)

        ### evaluation metrics
        train_dices_hard = binary_dice_per_instance_and_class(
            pred_train_one_hot, train_args['y'], dim)
        val_dices_hard = binary_dice_per_instance_and_class(
            pred_val_one_hot, val_args['y'], dim)
        train_dices_soft = binary_dice_per_instance_and_class(
            prediction_train_smax, train_args['y'], dim)
        val_dices_soft = binary_dice_per_instance_and_class(
            prediction_val_smax, val_args['y'], dim)

        class_train_acc = T.mean(T.eq(T.argmax(class_train_prediction, axis=1),
                                      train_args['c']),
                                 dtype=theano.config.floatX)
        class_val_acc = T.mean(T.eq(T.argmax(class_val_prediction, axis=1),
                                    val_args['c']),
                               dtype=theano.config.floatX)

        ### loss types
        if loss == 'cross_entropy':
            self.loss['train'] = categorical_crossentropy(
                prediction_train_smax_flat, flat_target_train).mean()
            self.loss['val'] = categorical_crossentropy(
                prediction_val_smax_flat, flat_target_val).mean()

        if loss == 'weighted_cross_entropy':
            theano_args['w'] = T.fvector()
            train_args['w'] = T.fvector()
            train_loss = categorical_crossentropy(prediction_train_smax_flat,
                                                  flat_target_train)
            train_loss *= train_args['w']
            self.loss['train'] = train_loss.mean()

            val_args['w'] = T.fvector()
            val_loss = categorical_crossentropy(prediction_val_smax_flat,
                                                flat_target_val)
            val_loss *= val_args['w']
            self.loss['val'] = val_loss.mean()

        if loss == 'dice':
            self.loss['train'] = 1 - train_dices_soft.mean()
            self.loss['val'] = 1 - val_dices_soft.mean()
        self.logger.info('Net: Using {} loss.'.format(loss))

        if self.cf.use_weight_decay:
            training_loss = self.loss['train'] + \
                            self.cf.weight_decay * lasagne.regularization.regularize_network_params(
                                self.net[self.cf.seg_out_layer_flat],
                                lasagne.regularization.l2)
            self.logger.info('Net: Using weight decay of {}.'.format(
                self.cf.weight_decay))
        else:
            training_loss = self.loss['train']

        class_reg = lasagne.regularization.regularize_network_params(
            class_layer, lasagne.regularization.l2, {'trainable': True})
        self.class_loss['train'] = lasagne.objectives.categorical_crossentropy(
            class_train_prediction, train_args['c']).mean()
        self.class_loss['val'] = lasagne.objectives.categorical_crossentropy(
            class_val_prediction, val_args['c']).mean()

        training_loss += (
            self.class_loss['train'] +
            self.cf.class_weight_decay * class_reg) * train_args['lw']

        ### training functions
        params = set(
            get_all_params(self.net[self.cf.class_layer], trainable=True))
        params = params.union(
            set(
                get_all_params(self.net[self.cf.seg_out_layer_flat],
                               trainable=True)))
        params = list(params)
        grads = theano.grad(training_loss, params)
        updates = adam(grads, params, learning_rate=train_args['lr'])

        self.train_fn = theano.function(train_args.values(), [
            self.loss['train'], train_dices_hard, class_train_acc,
            self.class_loss['train'], training_loss
        ],
                                        updates=updates)
        self.val_fn = theano.function(val_args.values(), [
            self.loss['val'], val_dices_hard, class_val_acc,
            self.class_loss['val']
        ])

        self.logger.info('Net: Compiled theano functions.')
예제 #55
0
 def value_single(self, x, y, f):
     ret = T.mean([T.min([1. - y + f[2], 1.]), T.min([1. - f[2] + y, 1.])])
     ret = T.cast(ret, dtype=theano.config.floatX)
     return T.cast(ifelse(T.eq(self.condition_single(x, f), 1.), ret, 1.),
                   dtype=theano.config.floatX)
예제 #56
0
def main(dd):

    # load hyperparameters
    h, eta, grad_clip, len_sample, n_dir = read_hyp('hyp_{}'.format(dd))
    # load model
    print("Load Network")
    load_l_in = lasagne.layers.InputLayer(shape=(None, len_sample, 2))

    # slice the las step to extract label
    load_l_forward_1 = lasagne.layers.LSTMLayer(
        load_l_in,
        h,
        grad_clipping=grad_clip,
        nonlinearity=lasagne.nonlinearities.tanh)

    #load_l_forward_2 = lasagne.layers.LSTMLayer(
    #    load_l_forward_1, h, grad_clipping=grad_clip,
    #    nonlinearity=lasagne.nonlinearities.tanh)

    load_l_forward_slice = lasagne.layers.SliceLayer(load_l_forward_1, -1, 1)

    load_l_out = lasagne.layers.DenseLayer(
        load_l_forward_slice,
        num_units=n_dir,
        W=lasagne.init.Normal(),
        nonlinearity=lasagne.nonlinearities.softmax)
    read_model_data(load_l_out, 'model_{}'.format(dd))

    target_values = T.ivector('target_output')

    network_output = lasagne.layers.get_output(load_l_out)
    network_act = lasagne.layers.get_output(load_l_forward_1)
    cost = T.nnet.categorical_crossentropy(network_output,
                                           target_values).mean()
    acc = T.mean(T.eq(T.argmax(network_output, axis=1), target_values),
                 dtype=theano.config.floatX)

    compute_cost = theano.function([load_l_in.input_var, target_values],
                                   [cost, acc, network_output, network_act],
                                   allow_input_downcast=True)

    # test
    perm = np.random.permutation(len(data_x))
    perm_data_x = data_x[perm[:n_test]]
    perm_data_y = data_y[perm[:n_test]]
    y_test = np.zeros(n_test)
    x_test = np.zeros((n_test, len_sample, 2))
    for i in range(n_test):
        y_test[i] = perm_data_y[i] - 1  # mmmm...
        x_test[i] = perm_data_x[i][0:len_sample]

    # test
    cost_test, acc_test, output_test, network_act_test = compute_cost(
        x_test, y_test)
    savemat('outputs_raw.mat', {
        'output': output_test,
        'input': x_test,
        'labels': y_test
    })
    dump_results((output_test, y_test, x_test), dd)
    print("Final test cost = {}, acc = {}".format(cost_test, acc_test))
예제 #57
0
# Define loss function and metrics, and get an updates dictionary
X_sym = T.tensor4()
y_sym = T.ivector()

# We'll connect our output classifier to the last fully connected layer of the network
net['new_output'] = DenseLayer(net['pool5'],
                               num_units=8,
                               nonlinearity=softmax,
                               W=lasagne.init.Normal(0.01))

prediction = lasagne.layers.get_output(net['new_output'], X_sym)
loss = lasagne.objectives.categorical_crossentropy(prediction, y_sym)
loss = loss.mean()

acc = T.mean(T.eq(T.argmax(prediction, axis=1), y_sym),
             dtype=theano.config.floatX)

learning_rate = theano.shared(np.array(0.001, dtype=theano.config.floatX))
learning_rate_decay = np.array(0.3, dtype=theano.config.floatX)
updates = OrderedDict()

print("Setting learning rates...")
for name, layer in net.items():
    print(name)
    layer_params = layer.get_params(trainable=True)
    if name in ['new_output', 'fc1000']:
        layer_lr = learning_rate
    else:
        layer_lr = learning_rate / 10
    if name != 'fc1000':
예제 #58
0
 def condition_single(self, x, f):
     return T.cast(T.eq(f[0], 1.), dtype=theano.config.floatX)
예제 #59
0
def main():
    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, LENGTH, 1),
                                            input_var=input_var,
                                            name='input')

    layer_rnn = RecurrentLayer(layer_input,
                               NUM_UNITS,
                               nonlinearity=nonlinearities.tanh,
                               only_return_final=True,
                               W_in_to_hid=lasagne.init.Constant(1),
                               W_hid_to_hid=lasagne.init.Constant(2),
                               b=None,
                               name='RNN')
    W = layer_rnn.W_hid_to_hid
    U = layer_rnn.W_in_to_hid

    output = lasagne.layers.get_output(layer_rnn)
    output = output.mean(axis=1)
    prediction = T.switch(T.gt(output, 0), 1, -1)
    acc = T.eq(prediction, target_var)
    acc = acc.sum()
    # get the output before activation function tanh
    epsilon = 1e-6
    prob = 0.5 * T.log((1 + output + epsilon) / (1 - output + epsilon))
    prob = nonlinearities.sigmoid(prob)
    loss = -0.5 * ((1 + target_var) * T.log(prob) +
                   (1 - target_var) * T.log(1 - prob))
    loss = loss.sum()

    batch_size = 100
    learning_rate = 0.01
    steps_per_epoch = 1000
    params = lasagne.layers.get_all_params(layer_rnn, trainable=True)
    updates = lasagne.updates.sgd(loss,
                                  params=params,
                                  learning_rate=learning_rate)
    train_fn = theano.function([input_var, target_var],
                               [loss, acc, W, U, output],
                               updates=updates)

    for epoch in range(10000):
        print 'Epoch %d (learning rate=%.4f)' % (epoch, learning_rate)
        loss = 0.0
        correct = 0.0
        num_back = 0
        for step in range(steps_per_epoch):
            x, y = get_batch(batch_size)
            err, corr, w, u, pred = train_fn(x, y)
            # print x
            # print y
            # print pred
            loss += err
            correct += corr
            num_inst = (step + 1) * batch_size
            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%, W: %.6f, U: %.6f' % (
                num_inst, loss / num_inst, correct, correct * 100 / num_inst,
                w.sum(), u.sum())
            sys.stdout.write(log_info)
            num_back = len(log_info)
            # raw_input()
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        assert num_inst == batch_size * steps_per_epoch
        print 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%' % (
            num_inst, loss / num_inst, correct, correct * 100 / num_inst)
예제 #60
0
 def logp(self, value):
     mu = self.mu
     log_prob = bound(
         logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0)
     # Return zero when mu and value are both zero
     return tt.switch(1 * tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)