def generate_theano_func(args, network, penalty, input_dict, target_var):

    prediction = get_output(network, input_dict)

    # loss = T.mean( target_var * ( T.log(target_var) - prediction ))
    loss = T.mean(categorical_crossentropy(prediction, target_var))
    # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) )
    # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params )
    # penalty = regularize_layer_params(l_forward_1_lstm, l2)
    # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params)
    # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) )

    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, input_dict, deterministic=True)
    # test_prediction = get_output(network, deterministic=True)
    # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction))
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    train_fn = theano.function(
        [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
        loss,
        updates=updates,
        allow_input_downcast=True,
    )

    if args.task == "sts":
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_prediction],
            allow_input_downcast=True,
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_acc],
            allow_input_downcast=True,
        )

    return train_fn, val_fn
Exemple #2
0
 def __init__(self, lr, C, momentum):
     self.lr = lr
     self.C = C
     self.momentum = momentum
     self.X = T.tensor4('X')
     self.y = T.ivector('y')
     self.network = self._build()
     self.params = layers.get_all_params(self.network, trainable=True)
     reg = regularization.regularize_network_params(self.network, regularization.l2)
     reg /= layers.helper.count_params(self.network)
     # 训练集
     yDropProb = layers.get_output(self.network)
     self.trEqs = myUtils.basic.eqs(yDropProb, self.y)
     trCrossentropy = objectives.categorical_crossentropy(yDropProb, self.y)
     self.trCost = trCrossentropy.mean() + C * reg
     # 验证、测试集
     yFullProb = layers.get_output(self.network, deterministic=True)
     self.vateEqs = myUtils.basic.eqs(yFullProb, self.y)
     vateCrossentropy = objectives.categorical_crossentropy(yFullProb, self.y)
     self.vateCost = vateCrossentropy.mean() + C * reg
     self.yPred = yFullProb
     # 训练函数,输入训练集,输出训练损失和误差
     updatesDict = updates.nesterov_momentum(self.trCost, self.params, lr, momentum)
     self.trainfn = myUtils.basic.makeFunc([self.X, self.y], [self.trCost, self.trEqs], updatesDict)
     # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新
     self.vatefn = myUtils.basic.makeFunc([self.X, self.y], [self.vateCost, self.vateEqs], None)
Exemple #3
0
 def __init__(self, istrained, name=None, args=None):
     self.istrained = istrained
     self.X = T.tensor4('X')
     self.y = T.ivector('y')
     self.outprob = build_model(self.X)
     if self.istrained:
         params = cPickle.load(open(dataset_path + 'plain_cnn.pkl', 'r'))
         layers.set_all_param_values(self.outprob, params)
         self.yFullProb = layers.get_output(self.outprob, deterministic=True)
         self.predfn = makeFunc([self.X, ], [self.yFullProb, ], None)
     else:
         self.lr, self.C, self.momentum = args
         self.params = layers.get_all_params(self.outprob, trainable=True)
         reg = regularization.regularize_network_params(self.outprob, regularization.l2)
         reg /= layers.helper.count_params(self.outprob)
         # 训练集
         self.yDropProb = layers.get_output(self.outprob)
         trCrossentropy = objectives.categorical_crossentropy(self.yDropProb, self.y)
         self.trCost = trCrossentropy.mean() + self.C * reg
         # 验证、测试集
         self.yFullProb = layers.get_output(self.outprob, deterministic=True)
         vateCrossentropy = objectives.categorical_crossentropy(self.yFullProb, self.y)
         self.vateCost = vateCrossentropy.mean() + self.C * reg
         # 训练函数,输入训练集,输出训练损失和误差
         updatesDict = updates.nesterov_momentum(self.trCost, self.params, self.lr, self.momentum)
         self.trainfn = makeFunc([self.X, self.y], [self.trCost, self.yDropProb], updatesDict)
         # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新
         self.vatefn = makeFunc([self.X, self.y], [self.vateCost, self.yFullProb], None)
Exemple #4
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer     = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer     = DimshuffleLayer(layer, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=1, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer_prediction  = layer

    # Loss
    prediction           = get_output(layer_prediction)
    loss                 = categorical_crossentropy(prediction.flatten(), target_var.flatten())

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True)
    test_loss            = categorical_crossentropy(test_prediction.flatten(), target_var.flatten())

    return test_prediction, prediction, loss, params
Exemple #5
0
def tied_neighbours(preds, n_sample_preds, n_classes):
    eps = 1e-8
    #preds = T.clip(preds, eps, 1-eps)
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    earlier_neighbours = preds_per_trial_row[:, :-1]
    later_neighbours = preds_per_trial_row[:, 1:]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    # Example of problem otherwise:
    """
    a = T.fmatrix()
    b = T.fmatrix()
    soft_out_a =softmax(a)
    soft_out_b =softmax(b)
    
    loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1])
    neigh_fn = theano.function([a,b], loss)
    
    neigh_fn(np.array([[0,1000,0]], dtype=np.float32), 
        np.array([[0.1,0.9,0.3]], dtype=np.float32))
    -> inf
    """

    # renormalize(?)

    earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours +
                          T.le(earlier_neighbours, eps) * earlier_neighbours +
                          eps)
    loss = categorical_crossentropy(earlier_neighbours, later_neighbours)
    return loss
Exemple #6
0
    def get_cost_U(self, image_input):
        print('getting_cost_U')
        prob_ys_given_x = self.classifier.get_output_for(
            self.classifier_helper.get_output_for(image_input))
        '''
        label_input_with = []
	for i in xrange(self.num_classes):
                label_input_with.append(self.convert_onehot(T.zeros([image_input.shape[0]], dtype='int64') + i))

        cost_L_with = []
	for i in xrange(self.num_classes):
                cost_L_with.append(self.get_cost_L([image_input, label_input_with[i]]))

        weighted_cost_L = T.zeros([image_input.shape[0],])
        for i in xrange(self.num_classes):
                weighted_cost_L += prob_ys_given_x[:, i] * cost_L_with[i]
        '''

        weighted_cost_L = T.zeros([
            image_input.shape[0],
        ])
        for i in xrange(self.num_classes):
            label_input = T.zeros([image_input.shape[0], self.num_classes])
            label_input = T.set_subtensor(label_input[:, i], 1)
            cost_L = self.get_cost_L([image_input, label_input])
            weighted_cost_L += prob_ys_given_x[:, i] * cost_L

        entropy_y_given_x = objectives.categorical_crossentropy(
            prob_ys_given_x, prob_ys_given_x)
        cost_U = weighted_cost_L - entropy_y_given_x

        return cost_U
Exemple #7
0
    def get_cost_test(self, inputs):
        image_input, label_input = inputs
        prob_ys_given_x = self.classifier.get_output_for(self.classifier_helper.get_output_for(image_input))
        cost_test = objectives.categorical_crossentropy(prob_ys_given_x, label_input)
        cost_acc = T.eq(T.argmax(prob_ys_given_x, axis=1), T.argmax(label_input, axis=1))

        return cost_test.mean(), cost_acc.mean()
Exemple #8
0
    def get_cost_U(self, image_input):
        print('getting_cost_U')
        prob_ys_given_x = self.classifier.get_output_for(self.classifier_helper.get_output_for(image_input))

        '''
        label_input_with = []
	for i in xrange(self.num_classes):
                label_input_with.append(self.convert_onehot(T.zeros([image_input.shape[0]], dtype='int64') + i))

        cost_L_with = []
	for i in xrange(self.num_classes):
                cost_L_with.append(self.get_cost_L([image_input, label_input_with[i]]))

        weighted_cost_L = T.zeros([image_input.shape[0],])
        for i in xrange(self.num_classes):
                weighted_cost_L += prob_ys_given_x[:, i] * cost_L_with[i]
        '''

        weighted_cost_L = T.zeros([image_input.shape[0],])
        for i in xrange(self.num_classes):
            label_input = T.zeros([image_input.shape[0], self.num_classes])
            label_input = T.set_subtensor(label_input[:, i], 1)
            cost_L = self.get_cost_L([image_input, label_input])
            weighted_cost_L += prob_ys_given_x[:,i] * cost_L

        entropy_y_given_x = objectives.categorical_crossentropy(prob_ys_given_x, prob_ys_given_x)
        cost_U = weighted_cost_L - entropy_y_given_x

        return cost_U
    def _get_train_fun(self):
        output_probs = get_output(self.net['l_dist'])   # "long" 2d matrix with prob distribution

        # cut off the first ids from every id sequence: they correspond to START_TOKEN, that we are not predicting
        target_ids = self.net['l_in_y'].input_var[:, 1:]
        target_ids_flattened = target_ids.flatten()               # "long" vector with target ids

        cost = categorical_crossentropy(
            predictions=output_probs,
            targets=target_ids_flattened
        ).mean()

        all_params = get_all_params(self.net['l_dist'], trainable=True)

        _logger.info("Computing train updates...")
        updates = lasagne.updates.adadelta(
            loss_or_grads=cost,
            params=all_params,
            learning_rate=LEARNING_RATE
        )

        _logger.info("Compiling train function...")
        train_fun = theano.function(
            inputs=[self.net['l_in_x'].input_var, self.net['l_in_y'].input_var],
            outputs=cost,
            updates=updates
        )

        return train_fun
Exemple #10
0
def set_network_predictor(input_data,
                          input_mask,
                          target_data,
                          target_mask,
                          network):

    # get network output data
    predict_data = get_output(network, deterministic=True)

    # get prediction index
    predict_idx = T.argmax(predict_data, axis=-1)

    # get prediction cost
    predict_cost = categorical_crossentropy(predictions=T.reshape(predict_data, (-1, predict_data.shape[-1]))+eps,
                                            targets=T.flatten(target_data, 1))
    predict_cost = predict_cost*T.flatten(target_mask, 1)
    predict_cost = predict_cost.sum()/target_mask.sum()

    # get prediction function
    predict_fn = theano.function(inputs=[input_data,
                                         input_mask,
                                         target_data,
                                         target_mask],
                                 outputs=[predict_idx,
                                          predict_cost], allow_input_downcast=True)

    return predict_fn
Exemple #11
0
    def __build_loss_train__fn__(self):
        # create loss function
        prediction = layers.get_output(self.net)
        loss = objectives.categorical_crossentropy(prediction,
                                                   self.__target_var__)
        loss = loss.mean() + 1e-4 * regularization.regularize_network_params(
            self.net, regularization.l2)

        val_acc = T.mean(T.eq(T.argmax(prediction, axis=1),
                              self.__target_var__),
                         dtype=theano.config.floatX)

        # create parameter update expressions
        params = layers.get_all_params(self.net, trainable=True)
        self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
        update_rule = updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=self.eta,
                                                momentum=0.9)

        # compile training function that updates parameters and returns training loss
        self.__train_fn__ = theano.function(
            [self.__input_var__, self.__target_var__],
            loss,
            updates=update_rule)
        self.__predict_fn__ = theano.function(
            [self.__input_var__],
            layers.get_output(self.net, deterministic=True))
        self.__val_fn__ = theano.function(
            [self.__input_var__, self.__target_var__], [loss, val_acc])
Exemple #12
0
    def _get_train_fun(self):
        output_probs = get_output(self.net['l_dist'])   # "long" 2d matrix with prob distribution

        input_ids = T.imatrix()
        # cut off the first ids from every id sequence: they correspond to START_TOKEN, that we are not predicting
        target_ids = input_ids[:, 1:]
        target_ids_flattened = target_ids.flatten()               # "long" vector with target ids

        cost = categorical_crossentropy(
            predictions=output_probs,
            targets=target_ids_flattened
        ).mean()

        all_params = get_all_params(self.net['l_dist'], trainable=True)

        print("Computing train updates...")
        updates = lasagne.updates.adadelta(
            loss_or_grads=cost,
            params=all_params,
            learning_rate=LEARNING_RATE
        )

        print("Compiling train function...")
        train_fun = theano.function(
            inputs=[self.net['l_in_x'].input_var, self.net['l_in_y'].input_var, input_ids],
            outputs=cost,
            updates=updates
        )

        return train_fun
Exemple #13
0
        def create_theano_loss(d):
            X, t = T.dmatrix('X'), T.dvector('t')
            log_sigma2 = theano.shared(np.ones((num_classes, d)))
            theta = theano.shared(np.random.randn(num_classes, d))

            # Change parametrization
            log_alpha = log_sigma2 - T.log(theta**2)
            la, alpha = log_alpha, T.exp(log_alpha)

            # -KL(q || prior)
            mD_KL = -(0.5 * T.log1p(T.exp(-la)) -
                      (0.03 + 1.0 /
                       (1.0 + T.exp(-(1.5 * (la + 1.3)))) * 0.64)).sum()

            # NLL through Local Reparametrization
            mu, si = T.dot(X, theta.T), T.sqrt(
                T.dot(X * X, (alpha * theta * theta).T))
            activation = mu + self._srng.normal(mu.shape, avg=0, std=1) * si
            predictions = T.nnet.softmax(activation)
            ell = -T.sum(
                categorical_crossentropy(predictions, one_hot(t, num_classes)))

            # Objective Negative SGVLB
            nlb = -(N / batch_size * ell + mD_KL)

            # Optimization Method and Function Compiling
            opt = lasagne.updates.adam(nlb, [log_sigma2, theta],
                                       learning_rate=lr,
                                       beta1=beta)
            lbf = function([X, t], nlb, updates=opt)

            return lbf, theta, log_sigma2
 def sensitivityBinaryCrossentropy(self, data,  batchSize = 128):
     """
     Returns the sensitivity of the categorical crossentropy with respect
     to the input data.
     
     :param data: Input data.
     :param labels: Respective labels.
     :param batchSize: The network iterates through the dataset with 
         batches, whose batch size is given by this parameter.
     """
     sens = np.zeros(data.shape)
     labelMatrix = T.ivector('labelVector')
     # Compute number of batches
     numBatches = int(np.ceil(float(sens.shape[0]) / float(batchSize)))
     startBatch = 0
     inputLayer = self.network.layers_[0].input_var
     output = get_output(self.network.layers_[-1], deterministic=True)
     score = categorical_crossentropy(output, labelMatrix).sum()
     calculatedGradients = theano.grad(score,inputLayer)
     for i in range(numBatches):
         endBatch = startBatch + batchSize
         if endBatch >= sens.shape[0]:
             endBatch = sens.shape[0]
             batchSize = endBatch - startBatch
         inputData = data[startBatch:endBatch].reshape(batchSize, 
                             data.shape[1], data.shape[2], data.shape[3])
         pred = output.eval({inputLayer: inputData}).argmax(axis=1)
         sens[startBatch:endBatch] = \
             calculatedGradients.eval({inputLayer: inputData, 
                                       labelMatrix: pred.astype('int32')})
         startBatch = endBatch        
     return sens
Exemple #15
0
 def __init__(self, C, lr):
     self.C = C
     self.X = T.ftensor4()
     self.Y = T.fmatrix()
     self.net = self._forward()
     params = layers.get_all_params(self.net['flatten'], trainable=True)
     netout = layers.get_output(self.net['out'])
     flattenout = layers.get_output(self.net['flatten'])
     reg = regularization.regularize_network_params(self.net['flatten'],
                                                    regularization.l2)
     reg /= layers.helper.count_params(self.net['flatten'])
     self.flattenfn = theano.function([self.X],
                                      flattenout,
                                      allow_input_downcast=True)
     self.predictfn = theano.function([self.X],
                                      netout,
                                      allow_input_downcast=True)
     accrarcy = myUtils.basic.accuracy(netout, self.Y)
     self.scorefn = theano.function([self.X, self.Y],
                                    accrarcy,
                                    allow_input_downcast=True)
     self.sharedBeta = self.net['out'].get_params()[0]
     crossentropy = objectives.categorical_crossentropy(netout, self.Y)
     cost = T.mean(crossentropy) + C * reg
     updatesDict = updates.nesterov_momentum(cost, params, lr, 0.9)
     # 训练随机参数
     self.trainfn = theano.function([self.X, self.Y], [cost, accrarcy],
                                    updates=updatesDict,
                                    allow_input_downcast=True)
Exemple #16
0
    def compileValFunction(self):

        message = 'Compiling the Validation Function'
        self.logger.info(logMessage('+', message))

        startTime = time.time()

        valPrediction = get_output(self.outputLayer, 
                                     deterministic = True,
                                     batch_norm_update_averages=False, 
                                     batch_norm_use_averages=False)
        # TODO. Chack wheather the flatten style of targetvar and output are same.
        self.flattenedTargetVar = T.flatten(self.targetVar)

        valLoss = categorical_crossentropy(valPrediction, self.flattenedTargetVar).mean()
        weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2)
        valLoss += self.weightDecay * weightNorm

        valPredictionLabel = T.argmax(valPrediction, axis = 1)
        valACC = T.mean(T.eq(valPredictionLabel, self.flattenedTargetVar), 
                        dtype = theano.config.floatX)

        valFunc = theano.function([self.inputVar, self.targetVar], 
                                  [valLoss, valACC])
        
        message = 'Compiled the Validation Function, spent {:.2f}s'.format(time.time()- startTime)
        self.logger.info(logMessage('+', message))

        return valFunc
Exemple #17
0
def multi_task_loss(y, t):
    cross_entropy = categorical_crossentropy(y[:, :num_class], t)
    regress_predictions = discrete_predict(y[:, -1])
    mse = squared_loss(regress_predictions, t)
    log_loss = cross_entropy.mean()
    reg_loss = mse.mean()
    return log_loss, reg_loss, log_loss + 3 * reg_loss
def compile_train_model(config):

    # build the training model
    train_batch_size = config['train_batch_size']  #number of bags
    bag_size = config['bag_size']
    input_var_train = T.tensor4('input_var_train')
    target_var = T.ivector('targets')
    train_network = build_train_model(train_batch_size, bag_size,
                                      input_var_train)

    learning_rate = theano.shared(np.float32(config['learning_rate']))
    classification_scores = lasagne.layers.get_output(train_network['prob'])
    debug_output = lasagne.layers.get_output(train_network['attention'])

    params = lasagne.layers.get_all_params(train_network['fc'], trainable=True)

    loss = T.mean(categorical_crossentropy(classification_scores, target_var))
    grads = T.grad(loss, params)

    for index, grad in enumerate(grads):
        if index > 25:
            grad *= 10.0

    y_pred = T.argmax(classification_scores, axis=1)
    error = T.mean(T.neq(y_pred, target_var))
    updates = lasagne.updates.nesterov_momentum(grads, params, learning_rate)

    train_model = theano.function([input_var_train, target_var], [loss, error],
                                  updates=updates)

    return train_network, train_model, learning_rate
Exemple #19
0
    def complieTrainFunction(self):
        message = 'Compiling the Training Function'
        self.logger.info(logMessage('+', message))

        startTime = time.time()

        trainPrediction = get_output(self.outputLayer, 
                                     deterministic = False,
                                     batch_norm_update_averages=False, 
                                     batch_norm_use_averages=False)
        # TODO. Chack wheather the flatten style of targetvar and output are same.
        self.flattenedTargetVar = T.flatten(self.targetVar)

        trainLoss = categorical_crossentropy(trainPrediction, self.flattenedTargetVar).mean()
        weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2)
        trainLoss += self.weightDecay * weightNorm

        trainPredictionLabel = T.argmax(trainPrediction, axis = 1)
        trainACC = T.mean(T.eq(trainPredictionLabel, self.flattenedTargetVar), 
                          dtype = theano.config.floatX)
        
        params = get_all_params(self.outputLayer, trainable = True)
        update = self.optimizer(trainLoss, params, learning_rate = self.learningRate)

        trainFunc = theano.function([self.inputVar, self.targetVar], 
                                    [trainLoss, trainACC], 
                                    updates = update)
        
        message = 'Compiled the Training Function, spent {:.2f}s'.format(time.time()- startTime)
        self.logger.info(logMessage('+', message))

        return trainFunc
Exemple #20
0
def compute_loss_tbptt(network, target_data, target_mask, is_first_win, delay,
                       context):
    o = get_output(network, deterministic=False)

    n_batch, n_seq, n_feat = o.shape

    if delay:
        o, target_data, target_mask = delayed_tbptt(o, target_data,
                                                    target_mask, is_first_win,
                                                    delay)
    elif context:
        o, target_data, target_mask = context_tbptt(o, target_data,
                                                    target_mask, context)

    ce = categorical_crossentropy(predictions=T.reshape(o, (-1, o.shape[-1]),
                                                        ndim=2),
                                  targets=T.flatten(target_data, 1))

    ce = ce * T.flatten(target_mask, 1)
    ce_cost = ce.sum() / n_batch
    ce_frame_sum = ce.sum()

    pred_idx = T.argmax(o, axis=-1)

    return ce_cost, ce_frame_sum, pred_idx
def multi_task_loss(y, t):
    cross_entropy = categorical_crossentropy(y[:, :num_class], t)
    regress_predictions = discrete_predict(y[:, -1])
    mse = squared_loss(regress_predictions, t)
    log_loss = cross_entropy.mean()
    reg_loss = mse.mean()
    return log_loss, reg_loss, log_loss + 3 * reg_loss
Exemple #22
0
def loss_acc(model, input_var, target_var, deterministic=True):
    """Calculate the loss/error and accuracy of a model.

    Parameters
    ----------
    model : a :class:`Layer` instance
        The model to evaluate.
    input_var : theano symbolic variable
        A variable representing the network input.
    target_var : theano symbolic variable
        A variable representing the desired network
        output.
    deterministic : boolean (``True``)
        Use deterministic mode (for testing) or not (for training).

    Returns
    -------
    theano symbolic variable (scalar)
        The categorical cross-entropy.
    theano symbolic variable (scalar)
        The accuracy.
    """
    prediction = get_output(model, inputs=input_var,
                            deterministic=deterministic)
    loss = categorical_crossentropy(prediction, target_var)
    acc = tensor.eq(tensor.argmax(prediction, axis=1), target_var)
    return tensor.mean(loss), tensor.mean(acc, dtype=config.floatX)
Exemple #23
0
def tied_neighbours(preds, n_sample_preds, n_classes):
    eps = 1e-8
    #preds = T.clip(preds, eps, 1-eps)
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    earlier_neighbours = preds_per_trial_row[:,:-1]
    later_neighbours = preds_per_trial_row[:,1:]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    # Example of problem otherwise:
    """
    a = T.fmatrix()
    b = T.fmatrix()
    soft_out_a =softmax(a)
    soft_out_b =softmax(b)
    
    loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1])
    neigh_fn = theano.function([a,b], loss)
    
    neigh_fn(np.array([[0,1000,0]], dtype=np.float32), 
        np.array([[0.1,0.9,0.3]], dtype=np.float32))
    -> inf
    """
    
    # renormalize(?)
    
    earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours + 
        T.le(earlier_neighbours, eps) * earlier_neighbours + eps)
    loss = categorical_crossentropy(earlier_neighbours, later_neighbours)
    return loss
Exemple #24
0
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss
Exemple #25
0
def test_maxpool_layer():
    l_in1 = InputLayer((None, 2))
    l_in2 = InputLayer((None, 20))
    l_hid = DenseLayer(l_in2, num_units=30, nonlinearity=rectify)
    l_pool = MaxpoolLayer([l_in1, l_hid])
    l_out = DenseLayer(l_pool, num_units=1, nonlinearity=sigmoid)

    bounds = theano.tensor.lmatrix('bounds')
    data = theano.tensor.matrix('data')
    targets = theano.tensor.matrix('targets')

    predictions = get_output(l_out, {l_in1: bounds, l_in2: data})
    loss = categorical_crossentropy(predictions, targets)
    loss = aggregate(loss, mode='mean')

    params = get_all_params(l_out)
    updates_sgd = sgd(loss, params, learning_rate=0.0001)

    train_function = theano.function([bounds, data, targets], updates=updates_sgd, allow_input_downcast=True)

    test_bounds = np.array([[0, 3], [3, 5], [5, 7]])
    test_X = np.random.randn(10, 20)
    test_Y = np.array([[0], [1], [0]])

    train_function(test_bounds, test_X, test_Y)
Exemple #26
0
def adversarial_training(model, inputs, labels, epsilon):
    logits = model(inputs)
    fast_grad_perturbation = fast_gradient_perturbation(
        inputs, logits, labels, epsilon)
    logits_adversarial = model(inputs + fast_grad_perturbation)
    loss = categorical_crossentropy(logits_adversarial, labels)
    return loss
Exemple #27
0
    def _get_train_fn(self):
        output_probs = get_output(self._net['dist_nolast'])

        mask = get_output(self._net['input_y_mask'])[:, 1:].flatten()
        nonpad_ids = mask.nonzero()

        target_ids = get_output(self._net['target'])
        loss_per_object = categorical_crossentropy(predictions=output_probs, targets=target_ids)
        loss = loss_per_object[nonpad_ids].mean()

        all_params = get_all_params(self._net['dist'], trainable=True)

        _logger.info('Computing train updates...')
        updates = lasagne.updates.adadelta(loss_or_grads=loss, params=all_params, learning_rate=self._learning_rate)

        _logger.info('Compiling train function...')

        train_fn = theano.function(
            inputs=[
                self._net['input_x'].input_var, self._net['input_y'].input_var,
                self._net['input_condition_id'].input_var
            ],
            givens={
                self._net['hid_states_decoder'].input_var:
                    T.zeros((self._batch_size, self._decoder_depth, self._hidden_layer_dim)),
                self._net['thought_vector'].input_var:
                    self._default_thoughts_vector,
                self._net['switch_enc_to_tv']:
                    np.cast[np.int32](False)  # Doesn't compile without explicit casting here
            },
            outputs=loss,
            updates=updates)
        return train_fn
Exemple #28
0
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss
Exemple #29
0
    def compile_train_predict(self, stochastic_train, stochastic_predict):
        # symbolic functions to compute marginal posterior GP
        input_vars = self.post_gp.data_variables
        gp_hyperparams = self.post_gp.params
        self.gp_hyperparams = gp_hyperparams

        mu = self.post_gp.mean()
        mu = mu.dimshuffle('x', 0)   # make a row out of 1d vector (N to 1xN)

        self.train_network = self.extend_network(mu, stochastic_train)

        train_predict = lasagne.layers.get_output(self.train_network)

        # Compute the exepcted prediction
        #if stochastic_train and self.n_samples > 1:
        #    train_predict = train_predict.mean(axis=0, keepdims=True)

        label = T.ivector('label')

        # For expected loss
        if stochastic_train:
            label_rep = label.repeat(self.n_samples)
        else:
            label_rep = label

        loss = categorical_crossentropy(train_predict, label_rep).mean()
        # For expected prediction
        #loss = categorical_crossentropy(train_predict, label).mean()
        if self.regularize_weight > 0:
            penalty = (self.regularize_weight *
                       regularize_network_params(self.train_network, l2))
            loss += penalty

        params = lasagne.layers.get_all_params(self.train_network,
                                               trainable=True)
        update_params = params
        if self.update_gp:
            update_params += gp_hyperparams
        grad_loss = theano.grad(loss, update_params,
                                consider_constant=input_vars)
        updates = self.optimizer(grad_loss, update_params,
                                 **self.optimizer_kwargs)
        self.train_fn = theano.function(input_vars + [label],
                                        loss, updates=updates)

        if stochastic_train == stochastic_predict:
            self.test_network = self.train_network
            self.copy_params = False
        else:
            self.test_network = self.extend_network(mu, stochastic_predict)
            self.copy_params = True

        # Set deterministic=True for dropout training if used.
        test_predict = lasagne.layers.get_output(self.test_network,
                                                 deterministic=True)
        if stochastic_predict and self.n_samples > 1:
            test_predict = test_predict.mean(axis=0, keepdims=True)

        self.predict_fn = theano.function(input_vars, test_predict)
def train_setup():

    x = T.tensor3('input')
    y = T.lvector('output')

    network = cnn(x, config.input_length, config.output_length)

    print 'Number of Parameters {0}'.format(count_params(network))

    if config.init_model is not None:

        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(decoding, param_values)

    # training tasks in sequence

    prediction = get_output(network)

    ent = categorical_crossentropy(prediction, y)
    ent = ent.mean()

    l1_norm = config.l1_weight * regularize_network_params(network, l1)
    l2_norm = config.l2_weight * regularize_network_params(network, l2)

    total_error = ent + l1_norm + l2_norm

    params = get_all_params(network, trainable=True)

    updates = adadelta( total_error, params, config.learning_rate, \
                                             config.rho, \
                                             config.eps )

    train_fn = function( [x, y], [ent, l1_norm, l2_norm, prediction], \
                              updates = updates, \
                              allow_input_downcast = True )

    val_prediction = get_output(network, deterministic=True)
    val_ent = categorical_crossentropy(val_prediction, y)
    val_ent = val_ent.mean()

    val_fn = function([x, y], [val_ent, val_prediction],
                      allow_input_downcast=True)

    return network, train_fn, val_fn
Exemple #31
0
def set_network_trainer(input_data,
                        input_mask,
                        target_data,
                        target_mask,
                        network,
                        updater,
                        learning_rate,
                        grad_max_norm=10.,
#                        l2_lambda=1e-5,
                        load_updater_params=None):

    # get network output data
    predict_data = get_output(network, deterministic=False)

    predict_idx = T.argmax(predict_data, axis=-1)

    # get prediction cost
    train_predict_cost = categorical_crossentropy(predictions=T.reshape(predict_data, (-1, predict_data.shape[-1])) + eps,
                                                  targets=T.flatten(target_data, 1))
    train_predict_cost = train_predict_cost*T.flatten(target_mask, 1)
    train_predict_cost = train_predict_cost.sum()/target_mask.sum()

    # get regularizer cost
    train_regularizer_cost = regularize_network_params(network, penalty=l2)

    # get network parameters
    network_params = get_all_params(network, trainable=True)

    # get network gradients with clipping
    network_grads = theano.grad(cost=train_predict_cost + train_regularizer_cost*l2_lambda,
                                wrt=network_params)
    network_grads = theano.grad(cost=train_predict_cost,
                                wrt=network_params)
    network_grads, network_grads_norm = total_norm_constraint(tensor_vars=network_grads,
                                                              max_norm=grad_max_norm,
                                                              return_norm=True)


    # set updater
    train_lr = theano.shared(lasagne.utils.floatX(learning_rate))
    train_updates, trainer_params = updater(loss_or_grads=network_grads,
                                            params=network_params,
                                            learning_rate=train_lr,
                                            load_params_dict=load_updater_params)

    # get training (update) function
    training_fn = theano.function(inputs=[input_data,
                                          input_mask,
                                          target_data,
                                          target_mask],
                                  outputs=[predict_data,
                                           predict_idx,
                                           train_predict_cost,
                                           train_regularizer_cost],
                                           network_grads_norm],
                                  updates=train_updates, allow_input_downcast=True)
Exemple #32
0
    def compute_cost(self, deterministic=False):
        output = get_output(self.net, deterministic=deterministic)

        cost = categorical_crossentropy(output, self.tg).mean()
        cost.name = 'negll'

        accuracy = categorical_accuracy(output, self.tg).mean()
        accuracy.name = 'accuracy'

        return cost, accuracy
Exemple #33
0
    def compute_cost(self, deterministic=False):
        output = get_output(self.net, deterministic=deterministic)

        cost = categorical_crossentropy(output, self.tg).mean()
        cost.name = 'negll'

        accuracy = categorical_accuracy(output, self.tg).mean()
        accuracy.name = 'accuracy'

        return cost, accuracy
Exemple #34
0
    def get_cost_test(self, inputs):
        image_input, label_input = inputs
        prob_ys_given_x = self.classifier.get_output_for(
            self.classifier_helper.get_output_for(image_input))
        cost_test = objectives.categorical_crossentropy(
            prob_ys_given_x, label_input)
        cost_acc = T.eq(T.argmax(prob_ys_given_x, axis=1),
                        T.argmax(label_input, axis=1))

        return cost_test.mean(), cost_acc.mean()
Exemple #35
0
def create_update(nnet):
    """ create an SVM loss for network given in argument
    """

    inputs = T.tensor4('inputs')
    targets = T.ivector('targets')

    C = Cfg.C
    floatX = Cfg.floatX

    svm_layer = nnet.svm_layer

    trainable_params = lasagne.layers.get_all_params(svm_layer, trainable=True)

    prediction = lasagne.layers.get_output(svm_layer,
                                           inputs=inputs,
                                           deterministic=False)

    if Cfg.softmax_loss:
        print("Using softmax output")
        out = lasagne.nonlinearities.softmax(prediction)

        train_loss = l_objectives.categorical_crossentropy(out, targets).mean()
        train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets),
                           dtype='floatX')
    else:
        objective, train_acc = svm_layer.objective(prediction, targets)

        train_loss = T.cast((objective) / targets.shape[0], 'floatX')
        train_acc = T.cast(train_acc * 1. / targets.shape[0], 'floatX')

    # NB: biases in L2-regularization
    l2_penalty = 0
    for layer in nnet.trainable_layers:
        l2_penalty = l2_penalty + T.sum(layer.W**2) + T.sum(layer.b**2)

    train_obj = floatX(0.5) / C * l2_penalty + train_loss

    updates = get_updates(nnet, train_obj, trainable_params)

    nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc],
                                    updates=updates)

    nnet.hinge_loss = theano.function([inputs, targets],
                                      [train_loss, train_acc])

    prediction = lasagne.layers.get_output(svm_layer,
                                           inputs=inputs,
                                           deterministic=True)
    objective, test_acc = svm_layer.objective(prediction, targets)
    test_loss = T.cast(objective / targets.shape[0], 'floatX')
    test_acc = T.cast(test_acc * 1. / targets.shape[0], 'floatX')
    test_obj = floatX(0.5) / C * l2_penalty + test_loss

    nnet.forward = theano.function([inputs, targets], [test_obj, test_acc])
    def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 28 * 28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32 * 32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        penalty = theano.shared(np.array(0.))
        for (k, num) in enumerate(args.MLPlayer):
            # the last layer should use softmax
            if k == len(args.MLPlayer) - 1:
                # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax))
                layers.append(
                    DenseLayerWithReg(args,
                                      layers[-1],
                                      num_units=num,
                                      nonlinearity=nonlinearities.softmax))
            else:
                # layers.append(ll.DenseLayer(layers[-1], num))
                layers.append(
                    DenseLayerWithReg(args, layers[-1], num_units=num))
            if layers[-1].W is not None:
                self.params_theta += [layers[-1].W, layers[-1].b]
                self.params_weight += [layers[-1].W]

                # define new regularization term for a layer
                if args.regL2 is True:
                    tempL2 = layers[-1].L2 * T.sqr(
                        layers[-1].W
                    )  #Michael: use 10**regularization constants
                    penalty += T.sum(tempL2)
                    self.params_lambda += [layers[-1].L2]
                if args.regL1 is True:
                    tempL1 = layers[-1].L1 * T.abs(
                        layers[-1].W
                    )  #Michael: use 10**regularization constants
                    penalty += T.sum(tempL1)
                    self.params_lambda += [layers[-1].L1]

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        self.penalty = penalty
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print("loss and losswithpenalty", type(self.loss),
              type(self.lossWithPenalty))
Exemple #37
0
def build_model0(input_var,target_var,regularW=0,params_load=None):
    network=layers.InputLayer(shape=(None,3,256,256),input_var=input_var)
    # size 256*256
    network=layers.Pool2DLayer(network,pool_size=(2,2),stride=2,pad=0,mode='average_inc_pad')
    #size 128*128
    network=layers.Pool2DLayer(network,pool_size=(2,2),stride=2,pad=0,mode='average_inc_pad')
    #size 64*64
    network=layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5),
                               nonlinearity=nonLinear.leaky_rectify,
                               W=init.GlorotUniform(gain='relu'),pad='same'
                               )
    
    network=layers.MaxPool2DLayer(network,pool_size=(2,2))
    network=layers.DropoutLayer(network,p=0.15)
    #size 32*32
    network=layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),
                               nonlinearity=nonLinear.leaky_rectify,
                               W=init.GlorotUniform(gain='relu'),pad='same'
                               )
    
    network=layers.MaxPool2DLayer(network,pool_size=(2,2))
    network=layers.DropoutLayer(network,p=0.2)
    #size 16*16
    network=layers.Conv2DLayer(network,num_filters=128,filter_size=(5,5),
                               nonlinearity=nonLinear.leaky_rectify,
                               W=init.GlorotUniform(gain='relu'),pad='same'
                               )
    
    network=layers.MaxPool2DLayer(network,pool_size=(2,2))
    network=layers.DropoutLayer(network,p=0.3)
    #size 8*8
    network=layers.Conv2DLayer(network,num_filters=256,filter_size=(5,5),
                               nonlinearity=nonLinear.leaky_rectify,
                               W=init.GlorotUniform(gain='relu'),pad='same'
                               )
    
    network=layers.MaxPool2DLayer(network,pool_size=(2,2))
    network=layers.DropoutLayer(network,p=0.4)
    #size 4*4
    network = layers.GlobalPoolLayer(network)    
    network=layers.DenseLayer(network,num_units=1000,
                              nonlinearity=nonLinear.leaky_rectify,
                              W=init.GlorotUniform(gain='relu'))
    network=layers.DenseLayer(network,num_units=2,
                              nonlinearity=nonLinear.softmax)
    prediction=layers.get_output(network)
    loss = objectives.categorical_crossentropy(prediction, target_var)
    loss=loss.mean()
    
    params=layers.get_all_params(network,trainable=True)        
    if params_load != None:
        [p.set_value(pval) for (p, pval) in zip(params, params_load)]
    
    return network,loss,params
Exemple #38
0
Fichier : ctc.py Projet : choko/ctc
	def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label):
		def backward_step(backlinks, position):
			new_position = backlinks[position]
			return new_position, position

		initial_state = T.argmax(forward_probabilities[x_end-1,y_end-2:y_end]) + y_end - 2

		results, _ = theano.scan(fn = backward_step, sequences = backward_pointers[0:x_end,:], outputs_info = [initial_state, None], go_backwards = True)
		alignment = label[results[1][::-1]]

		return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum')
Exemple #39
0
def tied_losses(preds, n_sample_preds, n_classes, n_pairs):
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    _srng = RandomStreams(get_rng().randint(1, 2147462579))
    rand_inds = _srng.choice([n_pairs  * 2], n_sample_preds, replace=False)
    part_1 = preds_per_trial_row[:,rand_inds[:n_pairs]]
    part_2 = preds_per_trial_row[:,rand_inds[n_pairs:]]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    eps = 1e-4
    part_1 = T.maximum(part_1, eps)
    loss = categorical_crossentropy(part_1, part_2)
    return loss
Exemple #40
0
 def create_test_function(self):
     """ Create Test Function
     """
     test_prediction = lasagne.layers.get_output(self.network,
                                                 deterministic=True)
     test_loss = categorical_crossentropy(test_prediction,
                                          self.target_var).mean()
     test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1),
                            self.target_var),
                       dtype=theano.config.floatX)
     self.test = theano.function([self.input_var, self.target_var],
                                 [test_loss, test_acc])
Exemple #41
0
def tied_losses(preds, n_sample_preds, n_classes, n_pairs):
    preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes))
    _srng = RandomStreams(get_rng().randint(1, 2147462579))
    rand_inds = _srng.choice([n_pairs * 2], n_sample_preds, replace=False)
    part_1 = preds_per_trial_row[:, rand_inds[:n_pairs]]
    part_2 = preds_per_trial_row[:, rand_inds[n_pairs:]]
    # Have to now ensure first values are larger zero
    # for numerical stability :/
    eps = 1e-4
    part_1 = T.maximum(part_1, eps)
    loss = categorical_crossentropy(part_1, part_2)
    return loss
    def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 1, 28, 28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32, 32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        self.penalty = theano.shared(np.array(0.))

        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))

        # Michael: add dropout
        layers.append(ll.DropoutLayer(layers[-1]))  # Michael
        #fc1
        layers.append(DenseLayerWithReg(args, layers[-1], num_units=500))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.DropoutLayer(layers[-1]))  # Michael
        #softmax
        layers.append(
            DenseLayerWithReg(args,
                              layers[-1],
                              num_units=10,
                              nonlinearity=nonlinearities.softmax))
        self.add_params_to_self(args, layers[-1])
        # no dropout on output

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print("loss and losswithpenalty", type(self.loss),
              type(self.lossWithPenalty))


# Michael: wide resnet: https://gist.github.com/FlorianMuellerklein/3d9ba175038a3f2e7de3794fa303f1ee
# https://github.com/FlorianMuellerklein/Identity-Mapping-ResNet-Lasagne/blob/master/models.py
Exemple #43
0
def create_spotlight_fn(final_layer, blur_axes, free_axes, weight_axes, trials_shape):
    ones_shape = [trials_shape[i_ax] if i_ax in blur_axes + free_axes else 1 
                  for i_ax in xrange(len(trials_shape))]

    means_stds_shape = [trials_shape[i_ax] if i_ax in free_axes else 1 
                  for i_ax in xrange(len(trials_shape))]
    means_stds_shape = [len(blur_axes)] + means_stds_shape
    #toadd: mixture of gaussians
    full_mask = T.ones(ones_shape, dtype=np.float32)
    broadcast_pattern = [True if ax not in (free_axes) else False 
                         for ax in xrange(len(trials_shape))]
    broadcast_pattern = [False] + broadcast_pattern

    means = theano.shared((np.ones(means_stds_shape)* 0.5).astype(np.float32),
                          broadcastable=broadcast_pattern)
    stds = theano.shared((np.ones(means_stds_shape)* 1).astype(np.float32),
                          broadcastable=broadcast_pattern)

    for i_blur_axis, axis in enumerate(blur_axes):
        ax_mask = T.constant(np.linspace(0,1, trials_shape[axis], dtype=np.float32))
        dimshuffle_pattern = [0 if ax == axis else 'x' for ax in xrange(len(trials_shape))]
        ax_mask = ax_mask.dimshuffle(*dimshuffle_pattern)
        # todo maybe have to fix this here?
        ax_gaussian = T.exp(-T.square((ax_mask - means[i_blur_axis]) / stds[i_blur_axis]) * 0.5)
        full_mask = full_mask * ax_gaussian
    
    weights_shape = [trials_shape[i_ax] if i_ax in weight_axes else 1 
                  for i_ax in xrange(1,len(trials_shape))]
    weights_shape = [trials_shape[0]] + weights_shape
    broadcast_pattern = [True if ax not in (weight_axes) else False 
                         for ax in xrange(1, len(trials_shape))]
    broadcast_pattern = [False] + broadcast_pattern
    weights = theano.shared((np.ones(weights_shape)).astype(np.float32),
                          broadcastable=broadcast_pattern)
    full_mask = full_mask * (T.maximum(weights,0) / 
        T.mean(T.maximum(weights,0), axis=0, keepdims=True))
    
    trials_var = T.ftensor4()
    scaled_trials = trials_var * full_mask
    targets = T.ivector()

    outputs = lasagne.layers.get_output(final_layer, inputs=scaled_trials, input_var=scaled_trials)

    loss = categorical_crossentropy(outputs, targets).sum()
    loss += T.mean(T.sqr(stds)) * 0.1
    loss -= T.mean(T.abs_(weights - T.mean(weights, axis=0, keepdims=True))) * 10
    adam_updates = adam(loss,[means, stds, weights], learning_rate=0.01)
    adam_grad_fn = theano.function([trials_var, targets], 
                                   [loss,outputs, scaled_trials, full_mask, weights], 
                                   updates=adam_updates)
    return adam_grad_fn
Exemple #44
0
def test_categorical_crossentropy():
    # symbolic version
    from lasagne.objectives import categorical_crossentropy
    p, t = theano.tensor.matrices('pt')
    c = categorical_crossentropy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    predictions /= predictions.sum(axis=1, keepdims=True)
    targets = np.random.rand(10, 20).astype(floatX)
    targets /= targets.sum(axis=1, keepdims=True)
    crossent = -(targets * np.log(predictions)).sum(axis=-1)
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Exemple #45
0
def test_categorical_crossentropy_onehot():
    # symbolic version
    from lasagne.objectives import categorical_crossentropy
    p = theano.tensor.matrix('p')
    t = theano.tensor.ivector('t')  # correct class per item
    c = categorical_crossentropy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    predictions /= predictions.sum(axis=1, keepdims=True)
    targets = np.random.randint(20, size=10).astype(np.uint8)
    crossent = -np.log(predictions[np.arange(10), targets])
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Exemple #46
0
def build_loss(targets, prediction, optimization):
	""" setup loss function with weight decay regularization """

	if optimization["objective"] == 'categorical':
		loss = objectives.categorical_crossentropy(prediction, targets)

	elif optimization["objective"] == 'binary':
		prediction = T.clip(prediction, 1e-7, 1-1e-7)
		loss = -(targets*T.log(prediction) + (1.0-targets)*T.log(1.0-prediction))
		# loss = objectives.binary_crossentropy(prediction[:,loss_index], targets[:,loss_index])

	elif (optimization["objective"] == 'squared_error'):
		loss = objectives.squared_error(prediction, targets)

	loss = objectives.aggregate(loss, mode='mean')

	return loss
Exemple #47
0
    def get_cost_updates(self, corrupted_input, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

	tilde_x=corrupted_input
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
	#z=corrupted_input
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
 #       L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
 	L=categorical_crossentropy(z,self.x)

       #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
	#cost=L.mean()

#	temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z))
#	L=-T.sum(temp)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)
#	print cost

	reg=1e-8*lasagne.regularization.l2(self.params[0])
	cost=cost+reg

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params,add_names='True')
	updates_sgd=sgd(cost,self.params,learning_rate)
	updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.9)
	updates=updates_dic.items()
        # generate the list of updates
    #    updates = [
     #       (param, param - learning_rate * gparam)
      #      for param, gparam in zip(self.params, gparams)
       # ]
       	



        return (cost, updates)
Exemple #48
0
 def get_functions():
 
     input_layer=layers.InputLayer(shape=(BATCH_SIZE, INPUT_LENGTH))
     print "input_layer size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1])
     layer = input_layer
 
     for layer_num in range(len(NUM_UNITS_HIDDEN_LAYER)):
         print "layer_num-"+str(layer_num)
         layer=layers.DenseLayer(layer,
                                    num_units=NUM_UNITS_HIDDEN_LAYER[layer_num],
                                    W=lasagne.init.Normal(0.01),
                                    nonlinearity=nonlinearities.tanh)
 
 
     output_layer=layers.DenseLayer(layer,
                                    num_units=OUTPUT_SIZE,
                                    nonlinearity=nonlinearities.softmax)
 
 
     network_output=get_output(output_layer)
     expected_output=T.ivector()
 
 
     loss_train=aggregate(categorical_crossentropy(network_output, expected_output), mode='mean')
 
     all_weigths=layers.get_all_params(output_layer)
 
     update_rule=lasagne.updates.nesterov_momentum(loss_train, all_weigths, learning_rate=LEARNING_RATE)
     
     print "input_layer_end size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1])
     train_function=theano.function(inputs=[input_layer.input_var, expected_output],
                                    outputs=loss_train,
                                    updates=update_rule,
                                    allow_input_downcast=True)
 
     prediction = T.argmax(network_output, axis=1)
     accuracy = T.mean(T.eq(prediction, expected_output), dtype=theano.config.floatX)  # @UndefinedVariable
 
     test_function=theano.function(inputs=[input_layer.input_var, expected_output],
                                   outputs=[loss_train, accuracy, prediction],
                                   allow_input_downcast=True)
     
     output_function=theano.function([input_layer.input_var],get_output(output_layer),
                                   allow_input_downcast=True)
 
     return train_function,test_function,output_function
 def __build_loss_train__fn__(self):
     # create loss function
     prediction = layers.get_output(self.net)
     loss = objectives.categorical_crossentropy(prediction, self.__target_var__)
     loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2)
     
     val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX)
     
     # create parameter update expressions
     params = layers.get_all_params(self.net, trainable=True)
     self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
     update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta,
                                                 momentum=0.9)
     
     # compile training function that updates parameters and returns training loss
     self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule)
     self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True))
     self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
    def compile_val(self):

        if self.verbose: print('compiling validation function...')
        
        import theano
        
        from lasagne.layers import get_output
        
        output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True)
        
        from lasagne.objectives import categorical_accuracy, categorical_crossentropy
        
        cost = categorical_crossentropy(output_val, self.y).mean()
        error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean()
        error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean()
        
        self.val_fn=  theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], 
                                          givens=[(self.x, self.shared_x_slice),
                                                  (self.y, self.shared_y_slice)]
                                                                )
def grad_supervised(l_ram, labels):
    """
    return:
      loss = 1 / M * sum_i_{1..M} cross_entroy_loss(groundtruth, a_T)
      grads = theano.grad(loss, params)
    inputs:
      labels = (n_batch,)
          [theano tensor variable]
    """
    loc_mean_t, loc_t, h_t, prob, pred = lasagne.layers.get_output(l_ram)
    params = lasagne.layers.get_all_params(l_ram, trainable=True)

    ### loss estimation (cross entropy loss)
    loss = categorical_crossentropy(prob, labels)
    loss = aggregate(loss, mode='mean')

    ### gradient estimation
    grads = theano.grad(loss, params, disconnected_inputs='ignore')

    return loss, grads
def main():
    print "Building network ..."
    l_out = build_network(N_BATCH)
    read_model_data(l_out, 'lstm_iter_60000')
    print "Done building network"

    target_values = T.tensor3('target_output')
    input_values = T.tensor3('input')

    network_output = lasagne.layers.get_output(l_out, input_values)

    # categorical crossentropy loss because it's the proper way
    cost = T.mean(categorical_crossentropy(T.reshape(network_output, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)) , T.reshape(target_values, (N_BATCH*MAX_LENGTH, N_FEAT_DIM))))
    all_params = lasagne.layers.get_all_params(l_out)
    print "Computing updates..."
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)
    print "Compiling functions..."

    train = theano.function(
        [input_values, target_values], cost, updates=updates)
    compute_cost = theano.function([input_values, target_values], cost)
    train_f = open('chatlog.txt','r')
    f_data = train_f.read()
    print "Training ..."
    try:
        for n in xrange(N_ITERATIONS):
            X, Y = gen_data(f_data, n, N_BATCH, MAX_LENGTH)
            train(X, Y)
            if not n % CHECK_FREQUENCY:
                cost_val = compute_cost(X, Y)
                print "Iteration {} training cost = {}".format(n, cost_val)
            if n % CHECKPOINT_FREQUENCY == 0 and n > 0:
                print "Saving checkpoint..."
                fname = "lstm_iter_%d" % (n)
                write_model_data(l_out, fname)
        
    except KeyboardInterrupt:
        pass    
params = layers.get_all_params(unsupervised_graph, trainable=True) + \
         layers.get_all_params(supervised_graph, trainable=True)
# params = layers.get_all_params(supervised_graph)[-2:]
params = utils.unique(params)

# Get regularizable params
regularization_params = layers.get_all_params(unsupervised_graph, regularizable=True) + \
                        layers.get_all_params(supervised_graph, regularizable=True)
regularization_params = utils.unique(regularization_params)

# Creating loss functions
# Train loss has to take into account of labeled image or not
if run_parameters.unsupervised_cost_fun == 'squared_error':
    loss1 = objectives.squared_error(reconstruction, input_var)
elif run_parameters.unsupervised_cost_fun == 'categorical_crossentropy':
    loss1 = objectives.categorical_crossentropy(reconstruction, input_var)
if supervised_cost_fun == 'squared_error':
    loss2 = objectives.squared_error(prediction, target_var) * repeat_col(labeled_var, 10)
elif supervised_cost_fun == 'categorical_crossentropy':
    loss2 = objectives.categorical_crossentropy(prediction, target_var) * labeled_var.T
l2_penalties = regularization.apply_penalty(regularization_params, regularization.l2)
sparse_layers = get_all_sparse_layers(unsupervised_graph)
sparse_layers_output = layers.get_output(sparse_layers, deterministic=True)
if run_parameters.sparse_regularizer_type == 0:
    sparse_regularizer = reduce(lambda x, y: x + T.clip((T.mean(abs(y)) - run_parameters.sparse_regularize_factor) *
                                                        y.size, 0, float('inf')),
                                sparse_layers_output, 0)
elif run_parameters.sparse_regularizer_type == 1:
    sparse_regularizer = reduce(
        lambda x, y: x + T.clip(T.mean(abs(y), axis=1) - run_parameters.sparse_regularize_factor,
                                0, float('inf')).sum() * y.shape[1],
def categorical_test(
        build_cnn_fn, hyperpars, imgdat, runopts, networkstr,
        get_eventids_hits_and_targets_fn, get_list_of_hits_fn
):
    """
    Run tests on the reserved test sample ("trainiing" examples with true
    values to check that were not used for learning or validation); read the
    data files in chunks into memory.

    `get_eventids_hits_and_targets_fn` needs to extract from a data slice
    a tuple of (eventids, [inputs], targets), where `[inputs]` might hold
    a single view or all three, etc.

    `get_list_of_hits_fn` needs to extract from a data slice a list of
    `[inputs]` that might hold a single view or all three, etc.
    """
    logger.info("Loading data for testing...")
    tstamp = get_tstamp_from_model_name(runopts['save_model_file'])
    train_sizes, valid_sizes, test_sizes = \
        get_and_print_dataset_subsizes(runopts['data_file_list'])
    used_sizes, used_data_size = get_used_data_sizes_for_testing(
        train_sizes, valid_sizes, test_sizes, runopts['test_all_data']
    )

    # Prepare Theano variables for inputs and targets
    inputlist = networkstr['input_list']
    target_var = T.ivector('targets')

    # Build the model
    network = build_cnn_fn(inputlist=inputlist,
                           imgw=imgdat['imgw'], imgh=imgdat['imgh'],
                           convpooldictlist=networkstr['topology'],
                           nhidden=networkstr['nhidden'],
                           dropoutp=networkstr['dropoutp'],
                           noutputs=networkstr['noutputs'],
                           depth=networkstr['img_depth']
    )
    with np.load(runopts['save_model_file']) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Create a loss expression for testing.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    l2_penalty = lasagne.regularization.regularize_layer_params(
        lasagne.layers.get_all_layers(network),
        lasagne.regularization.l2) * networkstr['l2_penalty_scale']
    test_loss = categorical_crossentropy(test_prediction, target_var) + \
        l2_penalty
    test_loss = test_loss.mean()
    # Also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)
    # Look at the classifications
    test_prediction_values = T.argmax(test_prediction, axis=1)

    # Compute the actual predictions - also instructive is to look at
    # `test_prediction` as an output (array of softmax probabilities)
    pred_fn = theano.function(inputlist,
                              [test_prediction, test_prediction_values],
                              allow_input_downcast=True)
    # Compile a function computing the validation loss and accuracy:
    inputlist.append(target_var)
    val_fn = theano.function(inputlist, [test_loss, test_acc],
                             allow_input_downcast=True)

    logger.info("Starting testing...")
    # compute and print the test error and...
    test_err = 0
    test_acc = 0
    test_batches = 0
    # look at some concrete predictions
    num_poss_segs = networkstr['noutputs']
    pred_target = np.zeros(num_poss_segs, dtype='float32')
    true_target = np.zeros(num_poss_segs, dtype='float32')
    targs_mat = np.zeros(num_poss_segs * num_poss_segs,
                         dtype='float32').reshape(num_poss_segs, num_poss_segs)

    test_slices = []
    for tsize in used_sizes:
        test_slices.append(slices_maker(tsize, slice_size=50000))
    test_set = None

    verbose_evt_print_freq = 1
    evtcounter = 0
    for i, data_file in enumerate(runopts['data_file_list']):

        for tslice in test_slices[i]:
            t0 = time.time()
            test_set = None
            if runopts['test_all_data']:
                test_set = load_all_datasubsets(data_file, tslice)
            else:
                test_set = load_datasubset(data_file, 'test', tslice)
            _, test_dstream = make_scheme_and_stream(
                test_set, 1, shuffle=False
            )
            t1 = time.time()
            logger.info("  Loading slice {} from {} took {:.3f}s.".format(
                tslice, data_file, t1 - t0)
            )
            logger.debug(
                "   dset sources: {}".format(test_set.provides_sources)
            )

            t0 = time.time()
            for data in test_dstream.get_epoch_iterator():
                eventids, inputlist, targets = \
                    get_eventids_hits_and_targets_fn(data)
                inputlist.append(targets)
                err, acc = val_fn(*inputlist)
                test_err += err
                test_acc += acc
                test_batches += 1
                hits_list = get_list_of_hits_fn(data)
                probs, pred = pred_fn(*hits_list)
                pred_targ = zip(pred, targets)
                evtcounter += 1
                if runopts['be_verbose']:
                    if evtcounter % verbose_evt_print_freq == 0:
                        logger.info("{}/{} - {}: (prediction, true target): {}, {}".
                              format(evtcounter,
                                     used_data_size,
                                     eventids[0],
                                     pred_targ, probs))
                for p, t in pred_targ:
                    targs_mat[t][p] += 1
                    true_target[t] += 1
                    if p == t:
                        pred_target[p] += 1
            t1 = time.time()
            logger.info("  -Iterating over the slice took {:.3f}s.".format(t1 - t0))

            del test_set
            del test_dstream

    acc_target = 100.0 * pred_target / true_target.astype('float32')
    perf_file = 'perfmat' + tstamp + '.npy'
    np.save(perf_file, targs_mat)
    logger.info(
        "\nFinal results:"
        "\n  test loss:\t\t\t{:.6f}"
        "\n  test accuracy:\t\t{:.2f} %".format(
            test_err / test_batches, test_acc / test_batches * 100)
    )
    for i, v in enumerate(acc_target):
        logger.info("   target {} accuracy:\t\t\t{:.3f} %".format(
            i, acc_target[i]))
def categorical_learn_and_validate(
        build_cnn_fn, hyperpars, imgdat, runopts, networkstr,
        get_list_of_hits_and_targets_fn
):
    """
    Run learning and validation for triamese networks using AdaGrad for
    learning rate evolution, nesterov momentum; read the data files in
    chunks into memory.

    `get_hits_and_targets` should extract a list `[inputs, targets]` from
    a data slice where `inputs` could be one item or 3 depending on the views
    studied (so total length is 2 or 4, most likely)
    """
    logger.info("Loading data...")
    train_sizes, valid_sizes, _ = \
        get_and_print_dataset_subsizes(runopts['data_file_list'])

    # Prepare Theano variables for inputs and targets
    target_var = T.ivector('targets')
    inputlist = networkstr['input_list']

    # Build the model
    network = build_cnn_fn(inputlist=inputlist,
                           imgw=imgdat['imgw'], imgh=imgdat['imgh'],
                           convpooldictlist=networkstr['topology'],
                           nhidden=networkstr['nhidden'],
                           dropoutp=networkstr['dropoutp'],
                           noutputs=networkstr['noutputs'],
                           depth=networkstr['img_depth']
    )
    logger.info(network_repr.get_network_str(
        lasagne.layers.get_all_layers(network),
        get_network=False, incomings=True, outgoings=True))
    if runopts['start_with_saved_params'] and \
       os.path.isfile(runopts['save_model_file']):
        logger.info(" Loading parameters file: %s" % \
                    runopts['save_model_file'])
        with np.load(runopts['save_model_file']) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(network, param_values)
    else:
        # Dump the current network weights to file in case we want to study
        # intialization trends, etc.
        np.savez('./initial_parameters.npz',
                 *lasagne.layers.get_all_param_values(network))

    # Create a loss expression for training.
    prediction = lasagne.layers.get_output(network)
    l2_penalty = lasagne.regularization.regularize_layer_params(
        lasagne.layers.get_all_layers(network),
        lasagne.regularization.l2) * networkstr['l2_penalty_scale']
    loss = categorical_crossentropy(prediction, target_var) + l2_penalty
    loss = loss.mean()

    # Create update expressions for training.
    params = lasagne.layers.get_all_params(network, trainable=True)
    logger.info(
        """
        ////
        Use AdaGrad update schedule for learning rate, see Duchi, Hazan, and
        Singer (2011) "Adaptive subgradient methods for online learning and
        stochasitic optimization." JMLR, 12:2121-2159
        ////
        """)
    updates_adagrad = lasagne.updates.adagrad(
        loss, params, learning_rate=hyperpars['learning_rate'], epsilon=1e-06)
    logger.info(
        """
        ////
        Apply Nesterov momentum using Lisa Lab's modifications.
        ////
        """)
    updates = lasagne.updates.apply_nesterov_momentum(
        updates_adagrad, params, momentum=hyperpars['momentum'])

    # Create a loss expression for validation/testing. Note we do a
    # deterministic forward pass through the network, disabling dropout.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = categorical_crossentropy(test_prediction, target_var) + \
        l2_penalty
    test_loss = test_loss.mean()
    # Also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    inputlist.append(target_var)
    train_fn = theano.function(inputlist, loss, updates=updates,
                               allow_input_downcast=True)
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function(inputlist, [test_loss, test_acc],
                             allow_input_downcast=True)

    logger.info("Starting training...")
    #
    # TODO: early stopping logic goes here...
    #
    train_slices = []
    for tsize in train_sizes:
        train_slices.append(slices_maker(tsize, slice_size=50000))
    valid_slices = []
    for vsize in valid_sizes:
        valid_slices.append(slices_maker(vsize, slice_size=50000))
    train_set = None
    valid_set = None

    epoch = 0
    for epoch in range(hyperpars['num_epochs']):

        start_time = time.time()
        for slicelist in train_slices:
            shuffle(slicelist)
        logger.info("Train slices for epoch %d: %s" % (epoch, train_slices))

        train_err = 0
        train_batches = 0
        for i, data_file in enumerate(runopts['data_file_list']):
            # In each epoch, we do a full pass over the training data:
            for tslice in train_slices[i]:

                t0 = time.time()
                train_set = load_datasubset(data_file, 'train', tslice)
                _, train_dstream = make_scheme_and_stream(
                    train_set, hyperpars['batchsize']
                )
                t1 = time.time()
                logger.info(
                    "  Loading slice {} from {} took {:.3f}s.".format(
                        tslice, data_file, t1 - t0)
                )
                logger.debug(
                    "   dset sources: {}".format(train_set.provides_sources)
                )

                t0 = time.time()
                for data in train_dstream.get_epoch_iterator():
                    inputs = get_list_of_hits_and_targets_fn(data)
                    train_err += train_fn(*inputs)
                    train_batches += 1
                t1 = time.time()
                logger.info(
                    "  -Iterating over the slice took {:.3f}s.".format(t1 - t0)
                )

                del train_set       # hint to garbage collector
                del train_dstream   # hint to garbage collector

                # Dump the current network weights to file at end of slice
                np.savez(runopts['save_model_file'],
                         *lasagne.layers.get_all_param_values(network))

        if runopts['do_validation_pass']:
            # And a full pass over the validation data
            t0 = time.time()
            val_err = 0
            val_acc = 0
            val_batches = 0
            for i, data_file in enumerate(runopts['data_file_list']):
                for vslice in valid_slices[i]:
                    valid_set = load_datasubset(data_file, 'valid', vslice)
                    _, valid_dstream = make_scheme_and_stream(
                        valid_set, hyperpars['batchsize']
                    )

                    for data in valid_dstream.get_epoch_iterator():
                        inputs = get_list_of_hits_and_targets_fn(data)
                        err, acc = val_fn(*inputs)
                        val_err += err
                        val_acc += acc
                        val_batches += 1

                    del valid_set
                    del valid_dstream

            t1 = time.time()
            logger.info("  The validation pass took {:.3f}s.".format(t1 - t0))

        # Print the results for this epoch:
        logger.info(
            "\nEpoch {} of {} took {:.3f}s"
            "\n  training loss:\t\t{:.6f}".format(
                epoch + 1, hyperpars['num_epochs'], time.time() - start_time,
                train_err / train_batches
            )
        )
        if runopts['do_validation_pass']:
            logger.info(
                "\n  validation loss:\t\t{:.6f}"
                "\n  validation accuracy:\t\t{:.2f} %".format(
                    val_err / val_batches,
                    val_acc / val_batches * 100
                )
            )
            logger.info("---")

    logger.info("Finished {} epochs.".format(epoch + 1))
Exemple #56
0
def build_network():
    from lasagne.layers import InputLayer, LSTMLayer, ConcatLayer, ReshapeLayer, DenseLayer, get_output, get_all_params
    from lasagne.objectives import categorical_crossentropy
    print("Building network ...")

    # inputs ###############################################
    l_in_x = InputLayer(shape=(BATCH_SIZE, None, vocab_size))
    l_in_y = InputLayer(shape=(BATCH_SIZE, None, vocab_size))

    # encoder ###############################################
    l_enc = LSTMLayer(
        l_in_x, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh,
        only_return_final=True)
    
    # decoder ###############################################
    l_repeated_enc = Repeat(l_enc, SEQ_LENGTH)
    l_conc = ConcatLayer([l_in_y, l_repeated_enc], axis=2)

    l_dec = LSTMLayer(
        l_conc, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh)

    # output ###############################################
    l_dec_long = ReshapeLayer(l_dec, shape=(-1, N_HIDDEN))

    l_dist = DenseLayer(
        l_dec_long,
        num_units=vocab_size,
        nonlinearity=lasagne.nonlinearities.softmax)

    l_out = ReshapeLayer(l_dist, shape=(BATCH_SIZE, -1, vocab_size))

    # print(lasagne.layers.get_output_shape(l_out))

    # compilations ###############################################
    target_values = T.btensor3('target_output')
    network_output = get_output(l_out)
    cost = categorical_crossentropy(network_output, target_values).mean()

    all_params = get_all_params(l_out,trainable=True)
    print("Computing updates ...")
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)

    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function(
        inputs=[l_in_x.input_var, l_in_y.input_var, target_values],
        outputs=cost,
        updates=updates,
        allow_input_downcast=True)

    compute_cost = theano.function(
        inputs=[l_in_x.input_var, target_values],
        outputs=cost,
        allow_input_downcast=True)

    predict = theano.function(
        inputs=[l_in_x.input_var],
        outputs=network_output,
        allow_input_downcast=True)

    return train, predict, compute_cost
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 
    filter_size=wordDim
    pool_size=num_filters

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))


    conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)  
    hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)


    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax)

    conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)  
    hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax)

    conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)  
    hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax)

    conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)  
    hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax)

    conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)  
    hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax)


    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax)

    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax)


    # Is this important?
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])

    loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, 
                hid_1:lambda_val, network_1:lambda_val} , l2)
    updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True)
    val_acc_1 =  T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True)


    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)


    loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, 
                hid_3:lambda_val, network_3:lambda_val} , l2)
    updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True)
    val_acc_3 =  T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var))
    val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True)


    loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, 
                hid_4:lambda_val, network_4:lambda_val} , l2)
    updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True)
    val_acc_4 =  T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var))
    val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True)

    loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, 
                hid_5:lambda_val, network_5:lambda_val} , l2)
    updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True)
    val_acc_5 =  T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var))
    val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, 
                hid_6:lambda_val, network_6:lambda_val} , l2)
    updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True)
    val_acc_6 =  T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var))
    val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True)

    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)


    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
 def loss(x, t):
     return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
Exemple #59
0
    input_data = bows_count
    batch_size = 32
    hidden_units = [256, 128]

    input_var = T.fmatrix('inputs')
    target_var = T.ivector('targets')

    input_layer = InputLayer(shape=(batch_size, input_data.shape[1]), name='input_layer', input_var=input_var)
    hidden = [DenseLayer(input_layer, hidden_units[0])]
    for ne in hidden_units[1:]:
        hidden.append(DenseLayer(hidden[-1], ne))
    output_layer = DenseLayer(hidden[-1], len(unique_tags), nonlinearity=softmax)
    prediction = get_output(output_layer)

    loss = categorical_crossentropy(prediction, target_var)  # + 0.0001 * regularize_network_params(hidden[0], l1)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
    # updates = adam(loss, params, learning_rate=0.001)
    train_fn = theano.function([input_var, target_var], [loss, prediction], updates=updates)
    test_fn = theano.function([input_var, target_var], [loss, prediction])

    print_interval = 100
    test_interval = 1000
    test_size = 100
    iter_idx = 0
    epoch_idx = 0
    stats_accum_train = dict(loss=0.0, acc=0.0, count=0.0)
    while True: