Example #1
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='../data/mnist.pkl.gz', batch_size=1, n_hidden=15):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_dataset('bio',reduce=3,samples=None, frac=1.0, rng_seed=1234)
    #datasets = load_dataset('mri',reduce=3,samples=None, frac=1.0, rng_seed=1234)
    #datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=1776, #n_in=184, # n_in=28 * 28,
                     n_hidden=n_hidden, n_out=2) # n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) \
         + L1_reg * classifier.L1 \
         + L2_reg * classifier.L2_sqr

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]})

    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    updates = []
    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    for param, gparam in zip(classifier.params, gparams):
        updates.append((param, param - learning_rate * gparam))

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(inputs=[index], outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                     (epoch, minibatch_index + 1, n_train_batches,
                      this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                    done_looping = True
                    break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #2
0
def reuse_SdA6(finetune_lr=None, pretraining_epochs=None,
             pretrain_lr=None, training_epochs=None,
              n_ins=None,
              hidden_layers_sizes=None,
              dataset_A=None, 
              n_outs=None,
              retrain=None,
              source_reuse_mode=None,
              #reset_pt=None,
              dataset_B=None,
              n_outs_source=None,
              batch_size=None,
              output_fold = None, 
              rng_seed=None,
              retrain_ft_layers=None,
              sda_reuse_pt_model=None, 
              sda_reuse_ft_model=None, 
              repetition=None,
              tau=None,
              training_data_fraction=None,
              #dropout = None,
              dropout_rate = None):
    

    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """
    
    # Import sandbox.cuda to bind the specified GPU to this subprocess
    # then import the remaining theano and model modules.
    import theano.sandbox.cuda
    theano.sandbox.cuda.use('gpu0')
    
    import theano
    import theano.tensor as T
    from theano.tensor.shared_randomstreams import RandomStreams
    from load_dataset2 import load_dataset, dropout_weights, combine_two_dataset
    from mlp import HiddenLayer
    from dA5 import dA5
    from SdA6 import SdA6
    from mlp5_train_model2 import train_test_mlp
    
    if source_reuse_mode is not 'Join':
        if dataset_A == 'mnist_64x80':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc+feat':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc+feat2':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc+feat3':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc+moa':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == 'bbbc+comp':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        elif dataset_A == '20news_4':
            datasets = load_dataset(dataset_A,reduce=5,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
        else:
            datasets = load_dataset(dataset_A,reduce=3,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
                   
        #datasets = load_dataset(dataset_A,reduce=3,samples=None, frac=training_data_fraction, rng_seed=rng_seed)
        #datasets = load_dataset(dataset_A)
        #datasets = load_dataset(dataset_A,reduce=1,samples=100)
    elif source_reuse_mode is 'Join':
        datasets = combine_two_dataset(dataset_A, dataset_B,frac=training_data_fraction, rng_seed=rng_seed+repetition)

    train_set_x, train_set_y = datasets[0]
    #print 'train_set_y', train_set_y
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]



    #datasets2 = load_dataset('csip_sv_ds_ar1',reduce=3,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
    #datasets2 = load_dataset('csip_ds_ar1',reduce=3,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
    #train_set_x2, train_set_y2 = datasets2[0]
    
    # compute number of minibatches for training, validation and testing
    #n_train_batches2 = train_set_x2.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
     
    #if approach == 'BL':
    if retrain == 0:    
        # numpy random generator
        #numpy_rng = numpy.random.RandomState(rng_seed);
        numpy_rng = numpy.random.RandomState(rng_seed+repetition)
    
        print '... building the model'
        # construct the stacked denoising autoencoder class
        sda = SdA6(numpy_rng=numpy_rng, n_ins=n_ins,
                  hidden_layers_sizes=hidden_layers_sizes,
                  n_outs=n_outs, #n_outs_b=n_outs_b,
                  tau=tau)
#         from SdA7 import SdA7
#         sda = SdA7(numpy_rng=numpy_rng, n_ins=n_ins,
#                   hidden_layers_sizes=hidden_layers_sizes,
#                   n_outs=n_outs, #n_outs_b=n_outs_b,
#                   tau=tau)
        
      
    
        #########################
        # PRETRAINING THE MODEL #
        #########################
        print '... getting the pretraining functions'
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size, tau=tau)
        
        #print "pre-training tau = ",tau
        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        corruption_levels = [.2, .3, .3, .3, .3]
        corruption_levels = [.1, .2, .3, .3, .3]
        #corruption_levels = [0, 0, 0]
        pt_trai_costs_vs_stage = []
        for i in xrange(sda.n_layers):
            # go through pretraining epochs
            pt_trai_costs = []
            for epoch in xrange(pretraining_epochs):
                # go through the training set
                c = []
                
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                              corruption=corruption_levels[i],
                              lr=pretrain_lr))
                    
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
                pt_trai_costs.append(numpy.mean(c))
                #print 'c', c
            pt_trai_costs_vs_stage.append(pt_trai_costs)
            
        end_time = time.clock()
        pt_time = end_time - start_time
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((pt_time) / 60.))
        
#         sda_reuse_pt_model = []
#         for para_copy in sda.params_b:
#             sda_reuse_pt_model.append(para_copy.get_value())

        sda_reuse_pt_model = []
        for para_copy in sda.params:
            sda_reuse_pt_model.append(para_copy.get_value())
 
        
        ########################
        # FINETUNING THE MODEL #
        ########################
    
        # get the training, validation and testing function for the model
        print '... getting the finetuning functions'

    
        start_time_ft = time.clock()
        
        if source_reuse_mode is 'Join' and n_outs is not n_outs_source:
            datasets = load_dataset(dataset_A,reduce=3,samples=None, frac=training_data_fraction, rng_seed=rng_seed+repetition)
            train_set_x, train_set_y = datasets[0]
            valid_set_x, valid_set_y = datasets[1]
            test_set_x, test_set_y = datasets[2]
        
            # compute number of minibatches for training, validation and testing
            n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
            n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
            n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
            
            
            
            
        train_fn, validate_model, test_model, test_predictions, test_class_probabilities = sda.build_finetune_functions(
                    datasets=datasets, batch_size=batch_size,
                    learning_rate=finetune_lr)
            
#         train_fn, validate_model, test_model, test_predictions = sda.build_finetune_functions(
#                     datasets=datasets, batch_size=batch_size,
#                     learning_rate=finetune_lr)
        
        
        
        print '... finetunning the model'
        
        best_validation_loss, test_score, test_predict, val_epochs,val_epochs_errs, test_epochs, test_epochs_errs, test_class_prob = train_test_mlp(learning_rate=0.01, training_epochs=training_epochs,#1000,
                         dataset=dataset_A, batch_size=batch_size, 
                         n_train_batches=n_train_batches,n_valid_batches=n_valid_batches,n_test_batches=n_test_batches,
                         train_fn=train_fn,validate_model=validate_model,test_model=test_model, test_predictions=test_predictions,
                         test_class_probabilities=test_class_probabilities)
        
        
        #test_class_prob = numpy.array(test_class_prob)
        #print numpy.shape(test_class_prob)
        
        #print test_class_prob[:,0].argmax(axis=0)
#         y_test_class_prob = test_class_prob[:,0]
#         y_test_class = test_class_prob[:,1]
#         import cPickle as pickle
#         pickle.dump(y_test_class_prob, open('bbbc_y_test_class_prob'+str(repetition)+'.pkl', 'wb'))
#         pickle.dump(y_test_class, open('bbbc_y_test_class.pkl', 'wb'))
#         #print test_class_prob
#         print numpy.shape(y_test_class_prob)
#         print y_test_class_prob[0:1, 0:10]
#         print y_test_class[0:1, 0:10]
        
# #         best_validation_loss, test_score, test_predict, val_epochs,val_epochs_errs, test_epochs, test_epochs_errs = train_test_mlp(learning_rate=0.01, training_epochs=training_epochs,#1000,
# #                          dataset=dataset_A, batch_size=batch_size, 
# #                          n_train_batches=n_train_batches,n_valid_batches=n_valid_batches,n_test_batches=n_test_batches,
# #                          train_fn=train_fn,validate_model=validate_model,test_model=test_model, test_predictions=test_predictions)
        
        test_predict = numpy.array(test_predict)
        y_test_pred = test_predict[:,0]
        y_test = test_predict[:,1]
        
        end_time_ft = time.clock()
        ft_time = end_time_ft - start_time_ft
        
        sda_reuse_ft2_model = []
        for para_copy in sda.params:
            #print 'para_copy22.get_value()',para_copy.get_value()
            sda_reuse_ft2_model.append(para_copy.get_value())
        
#         for ids in range(len(sda.params)):
#             a = sda.params[ids].get_value()
#             print 'a',a
#             sda_reuse_ft2_model.append(a)

        sda_reuse_ft_model = sda
        print 'done'
    
    
    ########################
        # RE- FINETUNING THE MODEL #
    ########################
    elif retrain == 1:
            
            from scipy.stats import bernoulli
            # numpy random generator
            numpy_rng = numpy.random.RandomState(rng_seed+repetition)
            
            print '... building the model'
            # construct the stacked denoising autoencoder class
            sda = SdA6(numpy_rng=numpy_rng, n_ins=n_ins,
                      hidden_layers_sizes=hidden_layers_sizes,
                      n_outs=n_outs, #n_outs_b=n_outs_b,
                      tau=tau)
            
            if source_reuse_mode == 'R':
                print 'random initialization'
                
            elif source_reuse_mode == 'R+D':
                print 'random initialization with dropout'
                for ids in range(len(sda.params)):
                    a = sda.params[ids].get_value()
                    b = dropout_weights(a, dropout_rate)
                    sda.params[ids].set_value(b)
                    
            elif source_reuse_mode == 'PT':
                print 'restoring source problem pre-training weights'
                if n_outs == n_outs_source:
                    for ids in range(len(sda.params)):
                        sda.params[ids].set_value(sda_reuse_pt_model[ids]) # set the value
                else:
                    for ids in range(len(sda.params)-2):
                        sda.params[ids].set_value(sda_reuse_pt_model[ids]) # set the value
            
            elif source_reuse_mode == 'PT+D':
                print 'restoring source problem pre-training weights with dropout'
                if n_outs == n_outs_source:
                    for ids in range(len(sda.params)):
                        a = sda_reuse_pt_model[ids]
                        b = dropout_weights(a, dropout_rate)
                        sda.params[ids].set_value(b)                       
                else:
                    for ids in range(len(sda.params)-2):
                        a = sda_reuse_pt_model[ids]
                        b = dropout_weights(a, dropout_rate)
                        sda.params[ids].set_value(b)      
            
            elif source_reuse_mode == 'PT+FT':
                print 'restoring source problem fine-tunned weights'
                if n_outs == n_outs_source:
                    for ids in range(len(sda.params)):
                        sda.params[ids].set_value(sda_reuse_ft_model.params[ids].get_value())
                else:
                    for ids in range(len(sda.params)-2):
                        #sda.params[ids].set_value(sda_reuse_ft_model.params[ids].get_value())
                        
                        # FOR BBBC data from float64 to float32
                        a = sda_reuse_ft_model.params[ids].get_value() 
                        b = a.astype(dtype='float32') 
                        sda.params[ids].set_value(b)
            
            elif source_reuse_mode == 'PT+FT+D':
                print 'restoring source problem fine-tunned weights with dropout'
                if n_outs == n_outs_source:
                    for ids in range(len(sda.params)):
                        a = sda_reuse_ft_model.params[ids].get_value()
                        b = dropout_weights(a, dropout_rate)
                        sda.params[ids].set_value(b)
                else:
                    for ids in range(len(sda.params)-2):
                        a = sda_reuse_ft_model.params[ids].get_value()
                        b = dropout_weights(a, dropout_rate)

                        sda.params[ids].set_value(b)
            
                        
            
            start_time_rft = time.clock()
            train_fn, validate_model, test_model, test_predictions, test_class_probabilities = sda.build_finetune_functions(
                    datasets=datasets, batch_size=batch_size,
                    learning_rate=finetune_lr)
            
            print '... finetunning the model'
        
            best_validation_loss, test_score, test_predict, val_epochs,val_epochs_errs, test_epochs, test_epochs_errs, test_class_prob = train_test_mlp(learning_rate=0.01, training_epochs=training_epochs,#1000,
                         dataset=dataset_A, batch_size=batch_size, 
                         n_train_batches=n_train_batches,n_valid_batches=n_valid_batches,n_test_batches=n_test_batches,
                         train_fn=train_fn,validate_model=validate_model,test_model=test_model, test_predictions=test_predictions,
                         test_class_probabilities=test_class_probabilities)
            
            
            
#             train_fn, validate_model, test_model, test_predictions = sda.build_finetune_functions_reuse(
#                         datasets=datasets, batch_size=batch_size,
#                         learning_rate=finetune_lr, retrain_ft_layers= retrain_ft_layers)
#             
#             best_validation_loss, test_score, test_predict, val_epochs,val_epochs_errs, test_epochs, test_epochs_errs = train_test_mlp(learning_rate=0.01, training_epochs=training_epochs,
#                      dataset=dataset_A, batch_size=batch_size,
#                      n_train_batches=n_train_batches,n_valid_batches=n_valid_batches,n_test_batches=n_test_batches,
#                      train_fn=train_fn,validate_model=validate_model,test_model=test_model, test_predictions=test_predictions)
            
            end_time_rft = time.clock()
            pt_time = 0
            ft_time = end_time_rft - start_time_rft
            
            test_predict = numpy.array(test_predict)
            y_test_pred = test_predict[:,0]
            y_test = test_predict[:,1]
        
            sda_reuse_ft_model = sda
            sda_reuse_ft2_model = []
            for para_copy in sda.params:
                sda_reuse_ft2_model.append(para_copy.get_value())
            sda_reuse_pt_model= None
            pt_trai_costs_vs_stage = None

    return (sda_reuse_pt_model,sda_reuse_ft2_model, sda_reuse_ft_model,
            best_validation_loss*100, test_score*100, 
            pt_time, ft_time, y_test_pred, y_test, 
            val_epochs,val_epochs_errs, test_epochs, test_epochs_errs,
            pt_trai_costs_vs_stage)
Example #3
0
from load_dataset2 import load_dataset

datasets = load_dataset("bbbc", reduce=5, samples=None, frac=1.0, rng_seed=1234)