def train_model(
    train_set_x_org=None,
    train_set_y_org=None,
    valid_set_x_org=None,
    valid_set_y_org=None,
    learning_rate=0.1,
    alpha=0.01,
    lambda1=0.001,
    lambda2=1.0,
    alpha1=0.001,
    alpha2=0.0,
    n_hidden=[256, 128, 16],
    n_epochs=1000,
    batch_size=100,
    activation_func="tanh",
    rng=numpy.random.RandomState(100),
):
    """
    Train a deep feature selection model. 
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples.
    
    valid_set_x_org: numpy 2d array, each row is a validation sample. 
    This set is to monitor the convergence of optimization.
    
    valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples.
        
    learning_rate: float scalar, the initial learning rate.
    
    alpha: float, parameter to trade off the momentum term.
    
    lambda1: float scalar, control the sparsity of the input weights.
    The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
    Thus, the larger lambda1 is, the sparser the input weights are.
        
    lambda2: float scalar, control the smoothness of the input weights.
    The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
    Thus, the larger lambda2 is, the smoother the input weights are.
        
    alpha1: float scalar, control the sparsity of the weight matrices in MLP.
    The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
    Thus, the larger alpha1 is, the sparser the MLP weights are.
    
    alpha2: float scalar, control the smoothness of the weight matrices in MLP.
    The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
    Thus, the larger alpha2 is, the smoother the MLP weights are.
    
    n_hidden, vector of int, n_hidden[i]: number of hidden units of the i-th layer.
    
    n_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    activation_func: string, specify activation function. {"tanh" (default),"sigmoid"}
    
    rng: numpy random number state.
    
    OUTPUTS:
    classifier: object of MLP, the model learned, returned for testing.
    
    training_time: float, training time in seconds.
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org, dtype=theano.config.floatX), borrow=True)
    train_set_y = T.cast(
        theano.shared(numpy.asarray(train_set_y_org, dtype=theano.config.floatX), borrow=True), "int32"
    )
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org, dtype=theano.config.floatX), borrow=True)
    valid_set_y = T.cast(
        theano.shared(numpy.asarray(valid_set_y_org, dtype=theano.config.floatX), borrow=True), "int32"
    )

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))

    # shared variable to reduce the learning rate
    learning_rate_shared = theano.shared(learning_rate, name="learn_rate_shared")
    decay_rate = T.scalar(name="decay_rate", dtype=theano.config.floatX)
    reduce_learning_rate = theano.function(
        [decay_rate], learning_rate_shared, updates=[(learning_rate_shared, learning_rate_shared * decay_rate)]
    )

    ## define the model below
    num_feat = train_set_x.get_value(borrow=True).shape[1]  # number of features
    n_cl = len(numpy.unique(train_set_y_org))  # number of classes

    activations = {"tanh": T.tanh, "sigmoid": T.nnet.sigmoid}
    activation = activations[activation_func]

    # build a MPL object
    classifier = DFS(
        rng=rng,
        n_in=num_feat,
        n_hidden=n_hidden,
        n_out=n_cl,
        lambda1=lambda1,
        lambda2=lambda2,
        alpha1=alpha1,
        alpha2=alpha2,
        activation=activation,
    )

    train_model_one_iteration = classifier.build_train_function(
        train_set_x, train_set_y, batch_size, alpha, learning_rate_shared
    )
    validate_model = classifier.build_valid_function(valid_set_x, valid_set_y, batch_size)

    print "... training"
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch
    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate = 100
    max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
    max_num_epoch_change_rate = 0.8
    learning_rate_decay_rate = 0.8
    epoch_change_count = 0
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count = epoch_change_count + 1
        if epoch_change_count % max_num_epoch_change_learning_rate == 0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate = cl.change_max_num_epoch_change_learning_rate(
                max_num_epoch_change_learning_rate, max_num_epoch_change_rate
            )
            max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
            epoch_change_count = 0
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)

                print (
                    "epoch %i, minibatch %i/%i, validation error %f %%"
                    % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0)
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve = 0
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params = classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve = num_epoch_not_improve + 1

        if num_epoch_not_improve >= max_num_epoch_not_improve:
            done_looping = True
            break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time = end_time - start_time
    print "Training time: %f" % (training_time / 60)
    print "Optimization complete with best validation score of %f," % (best_validation_loss * 100.0)
    return classifier, training_time
Exemple #2
0
def train_model(train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, 
                learning_rate=0.1, alpha=0.01, 
                lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, 
                n_hidden=[256,128,16], n_epochs=1000, batch_size=100, 
                activation_func="tanh", rng=numpy.random.RandomState(100)):
    """
    Train a deep feature selection model. 
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples.
    
    valid_set_x_org: numpy 2d array, each row is a validation sample. 
    This set is to monitor the convergence of optimization.
    
    valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples.
        
    learning_rate: float scalar, the initial learning rate.
    
    alpha: float, parameter to trade off the momentum term.
    
    lambda1: float scalar, control the sparsity of the input weights.
    The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
    Thus, the larger lambda1 is, the sparser the input weights are.
        
    lambda2: float scalar, control the smoothness of the input weights.
    The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
    Thus, the larger lambda2 is, the smoother the input weights are.
        
    alpha1: float scalar, control the sparsity of the weight matrices in MLP.
    The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
    Thus, the larger alpha1 is, the sparser the MLP weights are.
    
    alpha2: float scalar, control the smoothness of the weight matrices in MLP.
    The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
    Thus, the larger alpha2 is, the smoother the MLP weights are.
    
    n_hidden, vector of int, n_hidden[i]: number of hidden units of the i-th layer.
    
    n_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    activation_func: string, specify activation function. {"tanh" (default),"sigmoid"}
    
    rng: numpy random number state.
    
    OUTPUTS:
    classifier: object of MLP, the model learned, returned for testing.
    
    training_time: float, training time in seconds.
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True)
    valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))

    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
    
    ## define the model below
    num_feat=train_set_x.get_value(borrow=True).shape[1] # number of features
    n_cl=len(numpy.unique(train_set_y_org)) # number of classes
    
    activations={"tanh":T.tanh,"sigmoid":T.nnet.sigmoid}  
    activation=activations[activation_func]
    
    # build a MPL object    
    classifier = DFS(rng=rng, n_in=num_feat, n_hidden=n_hidden, n_out=n_cl,
                 lambda1=lambda1, lambda2=lambda2, alpha1=alpha1, alpha2=alpha2,
                 activation=activation)
                     
    train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, 
                                                              alpha, learning_rate_shared)
    validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size)
    
    print '... training'
        # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch
    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.8
    learning_rate_decay_rate=0.8
    epoch_change_count=0
    start_time = time.clock()
    
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0        
        for minibatch_index in xrange(n_train_batches):
           
            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                done_looping = True
                break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
    return classifier, training_time
Exemple #3
0
def finetune_model(classifier=None,
                   train_set_x=None,
                   train_set_y=None,
                   valid_set_x=None,
                   valid_set_y=None,
                   learning_rate=0.1,
                   alpha=0.01,
                   n_hidden=[256, 128, 16],
                   n_cl=2,
                   n_epochs=1000,
                   batch_size=100,
                   rng=numpy.random.RandomState(100)):
    """
    Finetune the model by training and validation sets.
    """

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(
        math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size

    # shared variable to reduce the learning rate
    learning_rate_shared = theano.shared(learning_rate,
                                         name='learn_rate_shared')
    decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX)
    reduce_learning_rate = theano.function(
        [decay_rate],
        learning_rate_shared,
        updates=[(learning_rate_shared, learning_rate_shared * decay_rate)])

    train_model_one_iteration, validate_model = classifier.build_finetune_functions(
        train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size,
        learning_rate_shared)
    print '... finetuning'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate = 100
    max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
    max_num_epoch_change_rate = 0.8
    learning_rate_decay_rate = 0.8
    epoch_change_count = 0
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count = epoch_change_count + 1
        if epoch_change_count % max_num_epoch_change_learning_rate == 0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
            epoch_change_count = 0
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve = 0
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params = classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve = num_epoch_not_improve + 1

        if num_epoch_not_improve >= max_num_epoch_not_improve:
            done_looping = True
            break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time = end_time - start_time
    print 'Training time: %f' % (training_time / 60)
    print 'Optimization complete with best validation score of %f,' % (
        best_validation_loss * 100.)
Exemple #4
0
def train_model(learning_rate=0.1, n_epochs=1000,
                train_set_x_org=None,train_set_y_org=None,valid_set_x_org=None,valid_set_y_org=None,
                           batch_size=100):
    """
    Train the logistic regression model. 
    
    INPUTS:
    learning_rate: float scalar, the initial learning rate.
    
    n_epochs: int scalar, the maximal number of epochs.
    
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples.
    
    valid_set_x_org: numpy 2d array, each row is a validation sample. 
    This set is to monitor the convergence of optimization.
    
    valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples.
    
    batch_size: int scalar, minibatch size.
    
    OUTPUTS:
    classifier: object of logisticRegression, the model learned, returned for testing.
    
    training_time: float, training time in seconds. 
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True)
    valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    

    # compute number of minibatches for training, validation and testing
    #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))
    
    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
    #learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
    #epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
    #compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
    #updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
   
   # define the model

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # each row is a sample
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    num_feat=train_set_x.get_value(borrow=True).shape[1]
    #print train_set_y.get_value()
    n_cl=len(numpy.unique(train_set_y_org))
    classifier = LogisticRegression(input=x, n_in=num_feat, n_out=n_cl)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    validate_model2 = theano.function(inputs=[],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x,
                y: valid_set_y})
                
    validate_model3 = theano.function(inputs=[], 
                                      outputs=classifier.y_pred,
                                      givens={x:valid_set_x})         
                
    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model_one_iteration` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model_one_iteration = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    # training the model below
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate=100 # initial maximal number of epochs to change learning rate
    max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate # max number of epochs without improvmenet to terminate the optimization  
    max_num_epoch_change_rate=0.8 # change to max number of epochs to change learning rate
    learning_rate_decay_rate=0.8    
    epoch_change_count=0
    start_time = time.clock()
    
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate) 
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0        
        for minibatch_index in xrange(n_train_batches):
           
            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                done_looping = True
                break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
    return classifier,training_time
Exemple #5
0
def train_model(rng=numpy.random.RandomState(100),
                train_set_x_org=None,
                n_hidden=100,
                learning_rate=0.1,
                training_epochs=100,
                batch_size=100,
                persistent_chain_k=15):
    """
    Train a RBM model given training data.
    
    INPUTS:
    rng: numpy random number state.
    
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    n_hidden, int, number of hidden units.
    
    learning_rate: float scalar, the initial learning rate.
    
    training_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    persistent_chain_k: length of persistent chain from the last sampling to new sampling.
    
    OUTPUTS:
    rbm: object of RBM. The model learned. 
    
    mean_hidden: numpy 2d array, each row is a reduced training sample.
    
    training_time: training time.
    """

    train_set_x = theano.shared(numpy.asarray(train_set_x_org,
                                              dtype=theano.config.floatX),
                                borrow=True)
    n_train_batches = int(
        math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # shared variable to reduce the learning rate
    learning_rate_shared = theano.shared(learning_rate,
                                         name='learn_rate_shared')
    #    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
    #    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX)
    #    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
    #    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate = theano.function(
        [decay_rate],
        learning_rate_shared,
        updates=[(learning_rate_shared, learning_rate_shared * decay_rate)])

    n_visible = train_set_x_org.shape[1]  # number of input features
    theano_rng = RandomStreams(rng.randint(2**30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x,
              n_visible=n_visible,
              n_hidden=n_hidden,
              numpy_rng=rng,
              theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain,
                                         k=persistent_chain_k)
    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm_one_iteration = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]},
        name='train_rbm')

    # optimization, gradient descent
    max_num_epoch_change_learning_rate = 100
    max_num_epoch_not_improve = 2 * max_num_epoch_change_learning_rate
    max_num_epoch_change_rate = 0.8
    epoch_change_count = 0
    best_cost = numpy.inf
    # train the model using training set
    start_time = time.clock()

    for epoch in xrange(training_epochs):
        c = []  # costs of all minibatches of this epoch
        epoch_change_count = epoch_change_count + 1
        if epoch_change_count % max_num_epoch_change_learning_rate == 0:
            reduce_learning_rate(0.5)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve = 2 * max_num_epoch_change_learning_rate
            epoch_change_count = 0
        for batch_index in xrange(n_train_batches):
            c_batch = train_rbm_one_iteration(batch_index)
            c.append(c_batch)
        this_cost = numpy.mean(c)
        print 'Training eopch: %d, cost: %f' % (epoch, this_cost)
        if this_cost < best_cost:
            best_cost = this_cost
            num_epoch_not_improve = 0
        if this_cost >= best_cost:
            num_epoch_not_improve = num_epoch_not_improve + 1
        if num_epoch_not_improve >= max_num_epoch_not_improve:
            break
    end_time = time.clock()
    training_time = end_time - start_time
    print 'Training time: %f' % (training_time / 60)

    # return the trained model and the reduced training set
    extracted = rbm.propup(train_set_x)
    get_extracted = theano.function([], extracted)
    pre_activation, mean_hidden = get_extracted()
    return rbm, mean_hidden, training_time
Exemple #6
0
def train_model(train_set_x_org=None, training_epochs=1000, batch_size=100,
                n_hidden=10,learning_rate=0.1,contraction_level=0.1,
                cost_measure="cross_entropy", rng=numpy.random.RandomState(100)):
    """
    Train a contractive autoencoder. 
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    training_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    n_hidden: int scalar, number of hidden units
    
    learning_rate: float scalar, the initial learning rate.
    
    corruption_level: float from interval [0,1), corruption level.
    
    cost_measure: string from {"cross_entropy", "euclidean"}, measure to compute the restructive cost.    
    
    rng: numpy random number state.
    
    OUTPUTS:
    ca: object of cA, the model learned, returned for testing.
    
    train_set_x_extracted: reduced training set.
    
    training_time: float, training time in seconds. 
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    #train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size    
    #n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
#    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
#    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
#    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
#    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
    
    n_visible=train_set_x_org.shape[1] # number of input features
    
    # define the model
    x=T.matrix(name='x',dtype=theano.config.floatX) # define a symbol for the input data (training, validation, or test data)
    ca=cA(numpy_rng=rng, input=x, n_visible=n_visible, n_hidden=n_hidden,n_batchsize=batch_size)
    # get the formula of the cost and updates    
    cost,updates=ca.get_cost_updates(contraction_level=contraction_level, learning_rate=learning_rate,
                                     cost_measure=cost_measure) 
    index=T.lscalar() # symbol for the index
    # define a function to update the cost and model parameters using the formula above     
    train_ca_one_iteration=theano.function([index], [ca.L_rec, ca.L_jacob], updates=updates,
                                           givens={x:train_set_x[index*batch_size:(index+1)*batch_size]})
    
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.8
    learning_rate_decay_rate=0.8
    epoch_change_count=0
    best_cost=numpy.inf
    # train the model using training set
    start_time=time.clock()
    
    for epoch in xrange(training_epochs):
        c=[] # costs of all minibatches of this epoch
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0
        for batch_index in xrange(n_train_batches):
            c_batch,j_batch=train_ca_one_iteration(batch_index)
            c.append(c_batch)
        this_cost=numpy.mean(c)
        print 'Training eopch: %d, cost: %f' % (epoch,this_cost)
        if this_cost<best_cost:
            best_cost=this_cost
            num_epoch_not_improve=0
        if this_cost>=best_cost:
            num_epoch_not_improve=num_epoch_not_improve+1
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                break
    end_time=time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    
    # return the trained model and the reduced training set
    extracted=ca.get_hidden_values(train_set_x)
    get_extracted=theano.function([],extracted)
    train_set_x_extracted=get_extracted()
    return ca, train_set_x_extracted, training_time
Exemple #7
0
def finetune_model(classifier=None,
                   train_set_x=None,train_set_y=None,valid_set_x=None,valid_set_y=None,
                   n_row_each_sample=1,
                   learning_rate=0.1, alpha=0.1, n_epochs=1000, rng=numpy.random.RandomState(1000), 
                   nkerns=[4,4,4],batch_size=500,
                   receptive_fields=((1,8),(1,8),(1,8)),poolsizes=((1,8),(1,8),(1,4)),full_hidden_sub=[16],full_hidden_all=[16],
                   max_num_epoch_change_learning_rate=80,
                   max_num_epoch_change_rate=0.8,
                   learning_rate_decay_rate=0.8):
    """
    Finetune the model using training and validation data.
    """        
    n_train=train_set_x.get_value(borrow=True).shape[0]
    n_train_batches=n_train//batch_size
    #n_train_batches =  int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))
    
    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
#    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
#    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
#    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
#    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
    
    train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, alpha, learning_rate_shared)
    validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size)
                                                              
    ###############
    # TRAIN MODEL #
    ###############
    print '... finetuning'
    # early-stopping parameters
    #max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=5*max_num_epoch_change_learning_rate    
    #max_num_epoch_change_rate=0.8
    #learning_rate_decay_rate=0.8
    epoch_change_count=0
    
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = n_train_batches; # min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping): # for every epoch
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0
        #compute_learn_rate(learning_rate,epoch,0.5)
        print 'The current learning rate is ', learning_rate_shared.get_value()
        for minibatch_index in xrange(n_train_batches): # for every minibatch

            iter = (epoch - 1) * n_train_batches + minibatch_index # number of total minibatchs so far

            #if iter % 100 == 0:
                #print 'training @ iter = ', iter
            
#            if minibatch_index==n_train_batches-1:
#                batch_size_current=n_train - minibatch_index*batch_size
#            else:
#                batch_size_current=batch_size
#            cost_ij = train_model_one_iteration(minibatch_index,batch_size_current)
            cost_ij = train_model_one_iteration(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()                

            if patience <= iter:
                done_looping = True
                break
            
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                done_looping = True
                break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time -start_time
    print 'Finetuning time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
def finetune_model(classifier=None,
                train_set_x=None, train_set_y=None, valid_set_x=None, valid_set_y=None, 
                learning_rate=0.1, alpha=0.01,  
                n_hidden=[256,128,16], n_cl=2, 
                n_epochs=1000, batch_size=100, rng=numpy.random.RandomState(100)):
    """
    Finetune the model by training and validation sets.
    """

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size

    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    

    train_model_one_iteration,validate_model=classifier.build_finetune_functions(train_set_x, train_set_y, 
                                                                   valid_set_x, valid_set_y, 
                                                                   batch_size, learning_rate_shared)
    print '... finetuning'
        # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.8
    learning_rate_decay_rate=0.8
    epoch_change_count=0
    start_time = time.clock()
    
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0        
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
            done_looping = True
            break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
Exemple #9
0
def train_model_old(train_set_x_org=None, train_set_y_org=None, valid_set_x_org=None, valid_set_y_org=None, 
                learning_rate=0.1, alpha=0.01, L1_reg=0.00, L2_reg=0.0001, n_hidden=[256,128,16],
                n_epochs=1000, batch_size=100, rng=numpy.random.RandomState(100)):
    """
    Train the model by training and validation sets.
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True)
    valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))

    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
    
    # define the model below

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # matrix, each row is a sample
    y = T.ivector('y')  # vector, intergers from {0,1,2,...,C-1}

    num_feat=train_set_x.get_value(borrow=True).shape[1] # number of features
    n_cl=len(numpy.unique(train_set_y_org)) # number of classes
    # build a MPL object    
    classifier = MLP(rng=rng, x=x, y=y, n_in=num_feat, n_hidden=n_hidden, n_out=n_cl)
    
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) \
         + L1_reg * classifier.L1 \
         + L2_reg * classifier.L2_sqr

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    grads = []
    for param in classifier.params:
        grad = T.grad(cost, param)
        grads.append(grad)

    delta_before=[]
    for param_i in classifier.params:
        delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape))
        delta_before.append(delta_before_i)
    
    updates = []
    # to add momentum?
    for param_i, grad_i, delta_before_i in zip(classifier.params, grads, delta_before):
        delta_i=-learning_rate_shared * grad_i  + alpha*delta_before_i
        updates.append((param_i, param_i + delta_i ))
        updates.append((delta_before_i,delta_i))
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
        
    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model_one_iteration = theano.function(inputs=[index], outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
    
    print '... training'
        # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=100#3*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.9
    epoch_change_count=0
    start_time = time.clock()
    
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(0.9)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=100#3*max_num_epoch_change_learning_rate            
            epoch_change_count=0        
        for minibatch_index in xrange(n_train_batches):
           
            minibatch_avg_cost = train_model_one_iteration(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()

            if patience <= iter:
                done_looping = True
                break
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                done_looping = True
                break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
    return classifier
def train_model(train_set_x_org=None,train_set_y_org=None,valid_set_x_org=None,valid_set_y_org=None,
                n_row_each_sample=1,
                learning_rate=0.1, alpha=0.1, n_epochs=1000, rng=numpy.random.RandomState(1000), 
                nkerns=[4,4,4],batch_size=500,
                receptive_fields=((2,8),(2,8),(2,8)),poolsizes=((1,8),(1,8),(1,4)),full_hidden=16):
    """
    Train the model using training and validation data.
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples.
    
    valid_set_x_org: numpy 2d array, each row is a validation sample. 
    This set is to monitor the convergence of optimization.
    
    valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples.
    
    n_row_each_sample: int, for each vectorized sample, the number of rows when matricize it.
    The vectorized sample is in the form of [row_0,row_1,...,row_{n_row_each_sample-1}].
    
    learning_rate: float, the initial learning rate.
    
    alpha: float, parameter to trade off the momentum term.
    
    n_epochs: int, maximal number of epochs allowed.
    
    rng: numpy random number state.
    
    nkerns: list, tuple, or vector, nkerns[i] is the number of feature maps in the i-th convolutional layer
    
    batch_size: int, minibatch size.
    
    receptive_fields: list or tuple of the same length as nkerns, 
    receptive_fields[i] is a list or tuple of length 2, the size of receptive field in the i-th convolutional layer. 
    receptive_fields[i]= (#rows of the receptive field, #columns of the receptive field).
    
    poolsizes: list or tuple of the same length as nkerns, the size to reduce to scalar. 
    poolsizes[i]=(#rows, #columns)
    
    full_hidden: the number of hidden units fulling connecting the units in the previous layer. 
    
    OUTPUTS:
    classifier: object of CNN class, the model trained.
    
    training_time: training time.
    """    
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')    
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,dtype=theano.config.floatX),borrow=True)
    valid_set_y = T.cast(theano.shared(numpy.asarray(valid_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')        
    
    n_train=train_set_x.get_value(borrow=True).shape[0]
    n_train_batches=n_train//batch_size
    #n_train_batches =  int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))
    
    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
#    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
#    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
#    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
#    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    
    num_feat=train_set_x.get_value(borrow=True).shape[1]
    input_size_row=n_row_each_sample # how many rows for each sample
    input_size_col=num_feat//n_row_each_sample
    input_size=(input_size_row,input_size_col)
    n_out=len(numpy.unique(train_set_y_org)) # number of classes
    classifier=cnn(rng=rng, batch_size=batch_size, input_size=input_size,
                 nkerns=nkerns, receptive_fields=receptive_fields, poolsizes=poolsizes,
                full_hidden=full_hidden, n_out=n_out)
                
    train_model_one_iteration=classifier.build_train_function(train_set_x, train_set_y, batch_size, 
                                                              alpha, learning_rate_shared)
    validate_model=classifier.build_valid_function(valid_set_x, valid_set_y, batch_size)
                                                              
    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.8
    learning_rate_decay_rate=0.8
    epoch_change_count=0
    
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = n_train_batches; # min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping): # for every epoch
        epoch = epoch + 1
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=3*max_num_epoch_change_learning_rate            
            epoch_change_count=0
        #compute_learn_rate(learning_rate,epoch,0.5)
        print 'The current learning rate is ', learning_rate_shared.get_value()
        for minibatch_index in xrange(n_train_batches): # for every minibatch

            iter = (epoch - 1) * n_train_batches + minibatch_index # number of total minibatchs so far

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            
#            if minibatch_index==n_train_batches-1:
#                batch_size_current=n_train - minibatch_index*batch_size
#            else:
#                batch_size_current=batch_size
#            cost_ij = train_model_one_iteration(minibatch_index,batch_size_current)
            cost_ij = train_model_one_iteration(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve=0
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    # save a copy of the currently best model parameter
                    best_model_params=classifier.get_params()                

            if patience <= iter:
                done_looping = True
                break
            
        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve=num_epoch_not_improve+1
            
        if num_epoch_not_improve>=max_num_epoch_not_improve:
                done_looping = True
                break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time=end_time -start_time
    print 'Training time: %f' %(training_time/60)
    print 'Optimization complete with best validation score of %f,' %(best_validation_loss * 100.)
    return classifier, training_time
def train_model(train_set_x_org=None,
                training_epochs=1000,
                batch_size=100,
                n_hidden=10,
                learning_rate=0.1,
                corruption_level=0.1,
                W=None,
                bhid=None,
                bvis=None,
                cost_measure="cross_entropy",
                rng=numpy.random.RandomState(100)):
    """
    Train a denoising autoencoder. 
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    training_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    n_hidden: int scalar, number of hidden units
    
    learning_rate: float scalar, the initial learning rate.
    
    corruption_level: float from interval [0,1), corruption level.
    
    cost_measure: string from {"cross_entropy", "euclidean"}, measure to compute the restructive cost.    
    
    rng: numpy random number state.
    
    OUTPUTS:
    da: object of dA, the model learned, returned for testing.
    
    train_set_x_extracted: reduced training set.
    
    training_time: float, training time in seconds. 
    """

    train_set_x = theano.shared(numpy.asarray(train_set_x_org,
                                              dtype=theano.config.floatX),
                                borrow=True)
    #train_set_y = T.cast(theano.shared(numpy.asarray(train_set_y_org,dtype=theano.config.floatX),borrow=True),'int32')
    #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = int(
        math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    # shared variable to reduce the learning rate
    learning_rate_shared = theano.shared(learning_rate,
                                         name='learn_rate_shared')
    #    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
    #    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX)
    #    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
    #    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate = theano.function(
        [decay_rate],
        learning_rate_shared,
        updates=[(learning_rate_shared, learning_rate_shared * decay_rate)])

    n_visible = train_set_x_org.shape[1]  # number of input features
    theano_rng = RandomStreams(rng.randint(2**30))  # random symbol

    # define the model
    x = T.matrix(
        name='x', dtype=theano.config.floatX
    )  # define a symbol for the input data (training, validation, or test data)
    da = dA_finetuning(numpy_rng=rng,
                       theano_rng=theano_rng,
                       input=x,
                       n_visible=n_visible,
                       n_hidden=n_hidden,
                       W=W,
                       bhid=bhid,
                       bvis=bvis)
    # get the formula of the cost and updates
    cost, updates = da.get_cost_updates(
        corruption_level=corruption_level,
        learning_rate=learning_rate,
        cost_measure=cost_measure
    )  # cost_measure can be either"cross_entropy" or "euclidean"
    index = T.lscalar()  # symbol for the index
    # define a function to update the cost and model parameters using the formula above
    train_da_one_iteration = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    max_num_epoch_change_learning_rate = 100
    max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
    max_num_epoch_change_rate = 0.8
    learning_rate_decay_rate = 0.8
    epoch_change_count = 0
    best_cost = numpy.inf
    # train the model using training set
    start_time = time.clock()

    for epoch in range(training_epochs):
        c = []  # costs of all minibatches of this epoch
        epoch_change_count = epoch_change_count + 1
        if epoch_change_count % max_num_epoch_change_learning_rate == 0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
            epoch_change_count = 0
        for batch_index in range(n_train_batches):
            c_batch = train_da_one_iteration(batch_index)
            #print ("function output=" + str(c_batch) + " for " + str(batch_index)) # c_batch is the cost
            c.append(c_batch)
        this_cost = numpy.mean(c)
        print('Training eopch: %d, cost: %f' % (epoch, this_cost))
        if this_cost < best_cost:
            best_cost = this_cost
            num_epoch_not_improve = 0
        if this_cost >= best_cost:
            num_epoch_not_improve = num_epoch_not_improve + 1
        if num_epoch_not_improve >= max_num_epoch_not_improve:
            break
    end_time = time.clock()
    training_time = end_time - start_time
    print('Training time: %f' % (training_time / 60))

    # return the trained model and the reduced training set
    extracted = da.get_hidden_values(train_set_x)
    get_extracted = theano.function([], extracted)
    train_set_x_extracted = get_extracted()
    return da, train_set_x_extracted, training_time
Exemple #12
0
def train_model(rng=numpy.random.RandomState(100), train_set_x_org=None, n_hidden=100, 
                learning_rate=0.1, training_epochs=100, batch_size=100, persistent_chain_k=15):
    """
    Train a RBM model given training data.
    
    INPUTS:
    rng: numpy random number state.
    
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    n_hidden, int, number of hidden units.
    
    learning_rate: float scalar, the initial learning rate.
    
    training_epochs: int scalar, the maximal number of epochs.
    
    batch_size: int scalar, minibatch size.
    
    persistent_chain_k: length of persistent chain from the last sampling to new sampling.
    
    OUTPUTS:
    rbm: object of RBM. The model learned. 
    
    mean_hidden: numpy 2d array, each row is a reduced training sample.
    
    training_time: training time.
    """
                    
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,dtype=theano.config.floatX),borrow=True)
    n_train_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # shared variable to reduce the learning rate
    learning_rate_shared=theano.shared(learning_rate,name='learn_rate_shared')
#    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
#    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate=T.scalar(name='decay_rate',dtype=theano.config.floatX)
#    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
#    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate=theano.function([decay_rate],learning_rate_shared,updates=[(learning_rate_shared,learning_rate_shared*decay_rate)])    
    
    n_visible=train_set_x_org.shape[1] # number of input features
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=n_visible,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,persistent=persistent_chain,k=persistent_chain_k)
    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm_one_iteration = theano.function([index], cost, updates=updates,
           givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]},
           name='train_rbm')

    # optimization, gradient descent
    max_num_epoch_change_learning_rate=100
    max_num_epoch_not_improve=2*max_num_epoch_change_learning_rate    
    max_num_epoch_change_rate=0.8
    epoch_change_count=0
    best_cost=numpy.inf
    # train the model using training set
    start_time=time.clock()
    
    for epoch in xrange(training_epochs):
        c=[] # costs of all minibatches of this epoch
        epoch_change_count=epoch_change_count+1
        if epoch_change_count % max_num_epoch_change_learning_rate ==0:
            reduce_learning_rate(0.5)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve=2*max_num_epoch_change_learning_rate            
            epoch_change_count=0
        for batch_index in xrange(n_train_batches):
            c_batch=train_rbm_one_iteration(batch_index)
            c.append(c_batch)
        this_cost=numpy.mean(c)
        print 'Training eopch: %d, cost: %f' % (epoch,this_cost)
        if this_cost<best_cost:
            best_cost=this_cost
            num_epoch_not_improve=0
        if this_cost>=best_cost:
            num_epoch_not_improve=num_epoch_not_improve+1
        if num_epoch_not_improve>=max_num_epoch_not_improve:
            break
    end_time=time.clock()
    training_time=end_time-start_time
    print 'Training time: %f' %(training_time/60)
    
    # return the trained model and the reduced training set
    extracted=rbm.propup(train_set_x)
    get_extracted=theano.function([],extracted)
    pre_activation,mean_hidden=get_extracted()
    return rbm, mean_hidden, training_time
def train_model(train_set_x_org=None,
                train_set_y_org=None,
                valid_set_x_org=None,
                valid_set_y_org=None,
                n_row_each_sample=1,
                learning_rate=0.1,
                alpha=0.1,
                n_epochs=1000,
                rng=numpy.random.RandomState(1000),
                nkerns=[4, 4, 4],
                batch_size=500,
                receptive_fields=((2, 8), (2, 8), (2, 8)),
                poolsizes=((1, 8), (1, 8), (1, 4)),
                full_hidden=16):
    """
    Train the model using training and validation data.
    
    INPUTS:
    train_set_x_org: numpy 2d array, each row is a training sample.
    
    train_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of training samples.
    
    valid_set_x_org: numpy 2d array, each row is a validation sample. 
    This set is to monitor the convergence of optimization.
    
    valid_set_y_org: numpy vector of type int {0,1,...,C-1}, class labels of validation samples.
    
    n_row_each_sample: int, for each vectorized sample, the number of rows when matricize it.
    The vectorized sample is in the form of [row_0,row_1,...,row_{n_row_each_sample-1}].
    
    learning_rate: float, the initial learning rate.
    
    alpha: float, parameter to trade off the momentum term.
    
    n_epochs: int, maximal number of epochs allowed.
    
    rng: numpy random number state.
    
    nkerns: list, tuple, or vector, nkerns[i] is the number of feature maps in the i-th convolutional layer
    
    batch_size: int, minibatch size.
    
    receptive_fields: list or tuple of the same length as nkerns, 
    receptive_fields[i] is a list or tuple of length 2, the size of receptive field in the i-th convolutional layer. 
    receptive_fields[i]= (#rows of the receptive field, #columns of the receptive field).
    
    poolsizes: list or tuple of the same length as nkerns, the size to reduce to scalar. 
    poolsizes[i]=(#rows, #columns)
    
    full_hidden: the number of hidden units fulling connecting the units in the previous layer. 
    
    OUTPUTS:
    classifier: object of CNN class, the model trained.
    
    training_time: training time.
    """
    train_set_x = theano.shared(numpy.asarray(train_set_x_org,
                                              dtype=theano.config.floatX),
                                borrow=True)
    train_set_y = T.cast(
        theano.shared(numpy.asarray(train_set_y_org,
                                    dtype=theano.config.floatX),
                      borrow=True), 'int32')
    valid_set_x = theano.shared(numpy.asarray(valid_set_x_org,
                                              dtype=theano.config.floatX),
                                borrow=True)
    valid_set_y = T.cast(
        theano.shared(numpy.asarray(valid_set_y_org,
                                    dtype=theano.config.floatX),
                      borrow=True), 'int32')

    n_train = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches = n_train // batch_size
    #n_train_batches =  int(math.floor(train_set_x.get_value(borrow=True).shape[0] / batch_size))
    #n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))

    # shared variable to reduce the learning rate
    learning_rate_shared = theano.shared(learning_rate,
                                         name='learn_rate_shared')
    #    learning_rate_init=T.scalar(name='learning_rate_init',dtype=theano.config.floatX)
    #    epoch_variable=T.iscalar(name='epoch_variable')
    decay_rate = T.scalar(name='decay_rate', dtype=theano.config.floatX)
    #    compute_learn_rate=theano.function([learning_rate_init,epoch_variable,decay_rate],learning_rate_shared, \
    #    updates=[(learning_rate_shared,learning_rate_init*decay_rate**(epoch_variable//100))]) # thenao does not support math.pow, instead use T.pow() or a**b
    reduce_learning_rate = theano.function(
        [decay_rate],
        learning_rate_shared,
        updates=[(learning_rate_shared, learning_rate_shared * decay_rate)])

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    num_feat = train_set_x.get_value(borrow=True).shape[1]
    input_size_row = n_row_each_sample  # how many rows for each sample
    input_size_col = num_feat // n_row_each_sample
    input_size = (input_size_row, input_size_col)
    n_out = len(numpy.unique(train_set_y_org))  # number of classes
    classifier = cnn(rng=rng,
                     batch_size=batch_size,
                     input_size=input_size,
                     nkerns=nkerns,
                     receptive_fields=receptive_fields,
                     poolsizes=poolsizes,
                     full_hidden=full_hidden,
                     n_out=n_out)

    train_model_one_iteration = classifier.build_train_function(
        train_set_x, train_set_y, batch_size, alpha, learning_rate_shared)
    validate_model = classifier.build_valid_function(valid_set_x, valid_set_y,
                                                     batch_size)

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    max_num_epoch_change_learning_rate = 100
    max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
    max_num_epoch_change_rate = 0.8
    learning_rate_decay_rate = 0.8
    epoch_change_count = 0

    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = n_train_batches
    # min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):  # for every epoch
        epoch = epoch + 1
        epoch_change_count = epoch_change_count + 1
        if epoch_change_count % max_num_epoch_change_learning_rate == 0:
            reduce_learning_rate(learning_rate_decay_rate)
            max_num_epoch_change_learning_rate= \
            cl.change_max_num_epoch_change_learning_rate(max_num_epoch_change_learning_rate,max_num_epoch_change_rate)
            max_num_epoch_not_improve = 3 * max_num_epoch_change_learning_rate
            epoch_change_count = 0
        #compute_learn_rate(learning_rate,epoch,0.5)
        print 'The current learning rate is ', learning_rate_shared.get_value()
        for minibatch_index in xrange(n_train_batches):  # for every minibatch

            iter = (
                epoch - 1
            ) * n_train_batches + minibatch_index  # number of total minibatchs so far

            if iter % 100 == 0:
                print 'training @ iter = ', iter


#            if minibatch_index==n_train_batches-1:
#                batch_size_current=n_train - minibatch_index*batch_size
#            else:
#                batch_size_current=batch_size
#            cost_ij = train_model_one_iteration(minibatch_index,batch_size_current)
            cost_ij = train_model_one_iteration(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %0.4f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    num_epoch_not_improve = 0
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    # save a copy of the currently best model parameter
                    best_model_params = classifier.get_params()

            if patience <= iter:
                done_looping = True
                break

        if this_validation_loss >= best_validation_loss:
            num_epoch_not_improve = num_epoch_not_improve + 1

        if num_epoch_not_improve >= max_num_epoch_not_improve:
            done_looping = True
            break
    # set the best model parameters
    classifier.set_params(best_model_params)
    end_time = time.clock()
    training_time = end_time - start_time
    print 'Training time: %f' % (training_time / 60)
    print 'Optimization complete with best validation score of %f,' % (
        best_validation_loss * 100.)
    return classifier, training_time