Ejemplo n.º 1
0
def predict():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """

    # load the saved model
    classifier = cPickle.load(open('best_model.pkl'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)

    # We can test it on some examples from test test
    dataset='mnist.pkl.gz'
    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    test_set_x = test_set_x.get_value()

    predicted_values = predict_model(test_set_x[:10])
    print ("Predicted values for the first 10 examples in test set:")
    print predicted_values
Ejemplo n.º 2
0
def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75],
                  pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002,
                  momentum=0.9, datasets=None, batch_size=128,
                  hidden_layers_sizes=[1024, 1024, 1024],
                  n_ins=784, n_outs=10, filename="../data/DBN.pickle",
                  load=True, save=True, verbose=False, pretraining_start=0,
                  pretraining_stop=-1, finetune=True, saveToDir = None, loadModelFromFile = None):

    folder_name = 'finetune_lr=%d' % finetune_lr + \
                  ' pretraining_epochs=%d-%d' % (pretraining_epochs[0], pretraining_epochs[1]) + \
                  ' pretrain_lr=%d-%d' % (pretrain_lr[0], pretrain_lr[1]) + \
                  ' k=%d' % k + \
                  ' weight_decay=%d' % weight_decay + \
                  ' momentum=%d' % momentum + \
                  ' batch_size=%d' % batch_size + \
                  ' hidden_layers_sizes=%d' % (hidden_layers_sizes[0])
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    if datasets is None:
        from load_data_MNIST import load_data
        datasets = load_data()
        
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState()

    #prepare save directory
    if saveToDir is not None:
    
        if saveToDir[-1] != '/':
            saveToDir += '/'

        if os.path.exists(saveToDir):
            timeStr = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
            saveToDir = saveToDir[:-1]+'_'+timeStr+"/"

        os.makedirs(saveToDir)

    logger = Logger(saveToDir, verbose)
        
    #save run params
    logger.logParameter('Start time', datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    logger.logParameter('\nNet parameters','')
    logger.logParameter('pretraining epochs', ', '.join(str(x) for x in pretraining_epochs))
    logger.logParameter('pretraining learning rate', ', '.join(str(x) for x in pretrain_lr))
    logger.logParameter('finetuning learning rate',finetune_lr)
    logger.logParameter('weight decay', weight_decay)
    logger.logParameter('momentum', momentum)
    logger.logParameter('CD-k', k)
    logger.logParameter('inputs count', n_ins)
    logger.logParameter('outputs count', n_outs)
    logger.logParameter('hidden layers sizes', ', '.join(str(x) for x in hidden_layers_sizes))
    logger.logParameter('batch size', batch_size)
    
    loaded = False
    
    if loadModelFromFile is not None:
        logger.logParameter('loading model', loadModelFromFile)
    
        logger.log('... trying to load the model from '+ loadModelFromFile)

        if os.path.isfile(loadModelFromFile):
            dbn = GRBM_DBN.load(loadModelFromFile)
            dbn.update_finetune_cost(weight_decay=weight_decay)

            loaded = True
            logger.log('... model loaded')
        else:
            logger.log('... couldn\' find the model file')

    
    if not loaded:
        logger.log('... building the model')
        
        # construct the Deep Belief Network
        dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=n_ins,
                       hidden_layers_sizes=hidden_layers_sizes,
                       n_outs=n_outs)

        #########################
        # PRETRAINING THE MODEL #
        #########################

        logger.log('... getting the pretraining functions')
        pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k)

        logger.log('... pre-training the model')

        start_time = time.clock()
        ## Pre-train layer-wise

        if pretraining_stop == -1:
            pretraining_stop = dbn.n_layers

        for i in xrange(pretraining_start, pretraining_stop):
            start_time_temp = time.clock()
            if i==0:
                pretrain_lr_new = pretrain_lr[0]
                pretraining_epochs_new = pretraining_epochs[0]
            else:
                pretrain_lr_new = pretrain_lr[1]
                pretraining_epochs_new = pretraining_epochs[1]

            # go through pretraining epochs

            for epoch in xrange(pretraining_epochs_new):

                if verbose:
                    # weights
                    image = Image.fromarray(
                        tile_raster_images(
                            X=dbn.rbm_layers[i].W.get_value(borrow=True).T,
                            img_shape=(28, 28),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)
                        )
                    )
                    image.save(folder_name + '/filters_at_layer_%i_epoch_%i.png' % (i, epoch))

                    # probabilities
                    X = valid_set_x[:20].eval()
                    hMean = sigmoid(numpy.dot(X, dbn.rbm_layers[i].W.get_value(borrow=True)) + dbn.rbm_layers[i].hbias.get_value(borrow=True))
                    image = Image.fromarray(hMean * 256)
                    image.save(folder_name + '/probabilities_at_layer_%i_epoch_%i.gif' % (i, epoch))

                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr_new))
                
                end_time_temp = time.clock()
                logger.log('Pre-training layer %i, epoch %d, cost %f ' % (i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) ))

        end_time = time.clock()
        logger.log('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
        if saveToDir:
            logger.log('... saving the model')
            dbn.save(saveToDir+'pretrained_model')
            
    ########################
    # FINETUNING THE MODEL #
    ########################

    if finetune:
        # get the training, validation and testing function for the model
        logger.log('... getting the finetuning functions')
        train_fn, validate_model, test_model = dbn.build_finetune_functions(datasets=datasets, batch_size=batch_size, momentum=momentum)
        logger.log('... finetunning the model')

        best_params = None
        best_validation_loss = numpy.inf
        last_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()
        current_lr = finetune_lr
        done_looping = False
        epoch = 0

        while not done_looping:
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                minibatch_avg_cost = train_fn(minibatch_index, current_lr)
                iter = (epoch - 1) * n_train_batches + minibatch_index

            import warnings
            warnings.filterwarnings("ignore")
            validation_losses = validate_model()
            this_validation_loss = numpy.mean(validation_losses)
            
            logger.log('epoch %i, validation error %f %%' % (epoch, this_validation_loss * 100))

            if this_validation_loss < best_validation_loss:
                best_validation_loss = this_validation_loss

            if this_validation_loss > last_validation_loss:
                current_lr /= 2.
                logger.log(('learning rate halved to %f') %(current_lr))

            last_validation_loss = this_validation_loss

            if current_lr < 0.001:
                done_looping = True

        test_losses = test_model()
        test_score = numpy.mean(test_losses)

        end_time = time.clock()
        logger.log('Optimization complete with best validation score of %f %% with test performance %f %%' % (best_validation_loss * 100., test_score * 100.))
        logger.log('The fine tuning code for file ' +os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time)/ 60.))
        
        if saveToDir:
            logger.log('... saving the final model')
            dbn.save(saveToDir+'final_model')
        return (best_validation_loss * 100., test_score * 100.)

    return (0., 0.)
Ejemplo n.º 3
0
def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75],
             pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002,
             momentum=0.9, datasets=None, batch_size=128,
             hidden_layers_sizes=[1024, 1024, 1024],
             n_ins=784, n_outs=10, filename="../data/DBN.pickle",
             load=True, save=True, verbose=False, pretraining_start=0,
             pretraining_stop=-1, finetune=True):

    if datasets is None:
        from load_data_MNIST import load_data
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState()

    loaded = False

    if load:
        print '... trying to load the model'

        if os.path.isfile(filename):
            dbn = GRBM_DBN.load(filename)
            dbn.update_finetune_cost(weight_decay=weight_decay)
            loaded = True
            print '... model loaded'
        else:
            print '... couldn\' find the model file'

    if not loaded:
        print '... building the model'
        # construct the Deep Belief Network
        dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=n_ins,
                    hidden_layers_sizes=hidden_layers_sizes,
                    n_outs=n_outs)

        #########################
        # PRETRAINING THE MODEL #
        #########################

        print '... getting the pretraining functions'
        pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size,
                                                    k=k)

        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise

        if pretraining_stop == -1:
            pretraining_stop = dbn.n_layers

        for i in xrange(pretraining_start, pretraining_stop):
            start_time_temp = time.clock()
            if i==0:
                pretrain_lr_new = pretrain_lr[0]
                pretraining_epochs_new = pretraining_epochs[0]
            else:
                pretrain_lr_new = pretrain_lr[1]
                pretraining_epochs_new = pretraining_epochs[1]

            # go through pretraining epochs

            for epoch in xrange(pretraining_epochs_new):
                if verbose:
                    # weights
                    image = Image.fromarray(
                        tile_raster_images(
                            X=dbn.rbm_layers[i].W.get_value(borrow=True).T,
                            img_shape=(28, 28),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)
                        )
                    )
                    image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch))
                    
                    # probabilities
                    X = valid_set_x[:20].eval()
                    hMean = sigmoid(numpy.dot(X, dbn.rbm_layers[i].W.get_value(borrow=True)) + dbn.rbm_layers[i].hbias.get_value(borrow=True))
                    image = Image.fromarray(hMean * 256)
                    image.save('probabilities_at_layer_%i_epoch_%i.gif' % (i, epoch))

                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                lr=pretrain_lr_new))
                end_time_temp = time.clock()
                print 'Pre-training layer %i, epoch %d, cost %f ' % (i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) )

        end_time = time.clock()
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))

        if save:
            print '... saving the model'
            dbn.save(filename)

    ########################
    # FINETUNING THE MODEL #
    ########################

    if finetune:
        # get the training, validation and testing function for the model
        print '... getting the finetuning functions'
        train_fn, validate_model, test_model = dbn.build_finetune_functions(
                    datasets=datasets, batch_size=batch_size, momentum=momentum)

        print '... finetunning the model'

        best_params = None
        best_validation_loss = numpy.inf
        last_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()
        current_lr = finetune_lr
        done_looping = False
        epoch = 0

        while not done_looping:
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                minibatch_avg_cost = train_fn(minibatch_index, current_lr)
                iter = (epoch - 1) * n_train_batches + minibatch_index

            import warnings
            warnings.filterwarnings("ignore")
            validation_losses = validate_model()
            this_validation_loss = numpy.mean(validation_losses)
            print('epoch %i, validation error %f %%' % \
                  (epoch, this_validation_loss * 100.))

            if this_validation_loss < best_validation_loss:
                best_validation_loss = this_validation_loss

            if this_validation_loss > last_validation_loss:
                current_lr /= 2.
                print(('    learning rate halved to %f') %
                      (current_lr))

            last_validation_loss = this_validation_loss

            if current_lr < 0.001:
                done_looping = True

        test_losses = test_model()
        test_score = numpy.mean(test_losses)

        end_time = time.clock()
        print(('Optimization complete with best validation score of %f %%,'
               'with test performance %f %%') %
                     (best_validation_loss * 100., test_score * 100.))
        print >> sys.stderr, ('The fine tuning code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time)
                                                  / 60.))
        
        if save:
            ts = time.time()
            st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')
            print '... saving the final model'
            dbn.save(re.sub('.pickle$', '', filename) + '_' + st + '.final.pickle')

        return (best_validation_loss * 100., test_score * 100.)

    return (0., 0.)
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-

from GRBM_DBN import test_GRBM_DBN
from GRBM_DBN import GRBM_DBN

from load_data_MNIST import load_data
from load_data_MNIST import load_raw_data

datasets = load_data()

#
#   UCZYMY SIEĆ
#
test_score, val_score = test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[1, 1], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002,
                momentum=0.9, batch_size=128, datasets=datasets, hidden_layers_sizes=[784,784], finetune = False,
                saveToDir = '../results/MNIST/', loadModelFromFile = '', verbose = True)

            

#
# UŻYCIE WYUCZONEJ SIECI
#

dbn = GRBM_DBN.load('../results/MNIST/pretrained_model')

train_set, valid_set, test_set = load_raw_data()

#klasyfikacja pierwszych 13 wzorców
print dbn.classify(train_set[0][1:13])
#realne klasy pierwszych 13 wzorców
print train_set[1][1:13]
Ejemplo n.º 5
0
def test_GRBM_DBN(finetune_lr=0.1,
                  pretraining_epochs=[225, 75],
                  pretrain_lr=[0.002, 0.02],
                  k=1,
                  weight_decay=0.0002,
                  momentum=0.9,
                  datasets=None,
                  batch_size=128,
                  hidden_layers_sizes=[1024, 1024, 1024],
                  n_ins=784,
                  n_outs=10,
                  filename="../data/DBN.pickle",
                  load=True,
                  save=True,
                  verbose=False,
                  pretraining_start=0,
                  pretraining_stop=-1,
                  finetune=True):

    if datasets is None:
        from load_data_MNIST import load_data
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState()

    loaded = False

    if load:
        print '... trying to load the model'

        if os.path.isfile(filename):
            dbn = GRBM_DBN.load(filename)
            dbn.update_finetune_cost(weight_decay=weight_decay)
            loaded = True
            print '... model loaded'
        else:
            print '... couldn\' find the model file'

    if not loaded:
        print '... building the model'
        # construct the Deep Belief Network
        dbn = GRBM_DBN(numpy_rng=numpy_rng,
                       n_ins=n_ins,
                       hidden_layers_sizes=hidden_layers_sizes,
                       n_outs=n_outs)

        #########################
        # PRETRAINING THE MODEL #
        #########################

        print '... getting the pretraining functions'
        pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size,
                                                    k=k)

        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise

        if pretraining_stop == -1:
            pretraining_stop = dbn.n_layers

        for i in xrange(pretraining_start, pretraining_stop):
            start_time_temp = time.clock()
            if i == 0:
                pretrain_lr_new = pretrain_lr[0]
                pretraining_epochs_new = pretraining_epochs[0]
            else:
                pretrain_lr_new = pretrain_lr[1]
                pretraining_epochs_new = pretraining_epochs[1]

            # go through pretraining epochs

            for epoch in xrange(pretraining_epochs_new):
                if verbose:
                    # weights
                    image = Image.fromarray(
                        tile_raster_images(
                            X=dbn.rbm_layers[i].W.get_value(borrow=True).T,
                            img_shape=(28, 28),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)))
                    image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch))

                    # probabilities
                    X = valid_set_x[:20].eval()
                    hMean = sigmoid(
                        numpy.dot(X, dbn.rbm_layers[i].W.get_value(
                            borrow=True)) +
                        dbn.rbm_layers[i].hbias.get_value(borrow=True))
                    image = Image.fromarray(hMean * 256)
                    image.save('probabilities_at_layer_%i_epoch_%i.gif' %
                               (i, epoch))

                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                lr=pretrain_lr_new))
                end_time_temp = time.clock()
                print 'Pre-training layer %i, epoch %d, cost %f ' % (
                    i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % (
                        (end_time_temp - start_time_temp))

        end_time = time.clock()
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] + ' ran for %.2fm' %
                              ((end_time - start_time) / 60.))

        if save:
            print '... saving the model'
            dbn.save(filename)

    ########################
    # FINETUNING THE MODEL #
    ########################

    if finetune:
        # get the training, validation and testing function for the model
        print '... getting the finetuning functions'
        train_fn, validate_model, test_model = dbn.build_finetune_functions(
            datasets=datasets, batch_size=batch_size, momentum=momentum)

        print '... finetunning the model'

        best_params = None
        best_validation_loss = numpy.inf
        last_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()
        current_lr = finetune_lr
        done_looping = False
        epoch = 0

        while not done_looping:
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                minibatch_avg_cost = train_fn(minibatch_index, current_lr)
                iter = (epoch - 1) * n_train_batches + minibatch_index

            import warnings
            warnings.filterwarnings("ignore")
            validation_losses = validate_model()
            this_validation_loss = numpy.mean(validation_losses)
            print('epoch %i, validation error %f %%' % \
                  (epoch, this_validation_loss * 100.))

            if this_validation_loss < best_validation_loss:
                best_validation_loss = this_validation_loss

            if this_validation_loss > last_validation_loss:
                current_lr /= 2.
                print(('    learning rate halved to %f') % (current_lr))

            last_validation_loss = this_validation_loss

            if current_lr < 0.001:
                done_looping = True

        test_losses = test_model()
        test_score = numpy.mean(test_losses)

        end_time = time.clock()
        print(('Optimization complete with best validation score of %f %%,'
               'with test performance %f %%') %
              (best_validation_loss * 100., test_score * 100.))
        print >> sys.stderr, ('The fine tuning code for file ' +
                              os.path.split(__file__)[1] + ' ran for %.2fm' %
                              ((end_time - start_time) / 60.))

        if save:
            ts = time.time()
            st = datetime.datetime.fromtimestamp(ts).strftime(
                '%Y-%m-%d_%H-%M-%S')
            print '... saving the final model'
            dbn.save(
                re.sub('.pickle$', '', filename) + '_' + st + '.final.pickle')

        return (best_validation_loss * 100., test_score * 100.)

    return (0., 0.)
Ejemplo n.º 6
0
def test_rbm(learning_rate=0.1, training_epochs=15,
             dataset='mnist.pkl.gz', batch_size=20,
             n_chains=20, n_samples=10, output_folder='rbm_plots',
             n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=28 * 28,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function([index], cost,
           updates=updates,
           givens={x: train_set_x[index * batch_size:
                                  (index + 1) * batch_size]},
           name='train_rbm')

    plotting_time = 0.
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(tile_raster_images(
                 X=rbm.W.get_value(borrow=True).T,
                 img_shape=(28, 28), tile_shape=(10, 10),
                 tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print ('Training took %f minutes' % (pretraining_time / 60.))

    #################################
    #     Sampling from the RBM     #
    #################################
    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(numpy.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX))

    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    [presig_hids, hid_mfs, hid_samples, presig_vis,
     vis_mfs, vis_samples], updates =  \
                        theano.scan(rbm.gibbs_vhv,
                                outputs_info=[None,  None, None, None,
                                              None, persistent_vis_chain],
                                n_steps=plot_every)

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                updates=updates,
                                name='sample_fn')

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1),
                             dtype='uint8')
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
                X=vis_mf,
                img_shape=(28, 28),
                tile_shape=(1, n_chains),
                tile_spacing=(1, 1))
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
    os.chdir('../')
Ejemplo n.º 7
0
def test_rbm(learning_rate=0.1, training_epochs=15,
             dataset='mnist.pkl.gz', batch_size=20,
             n_chains=20, n_samples=10, output_folder='rbm_plots',
             n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=28 * 28,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function([index], cost,
           updates=updates,
           givens={x: train_set_x[index * batch_size:
                                  (index + 1) * batch_size]},
           name='train_rbm')

    plotting_time = 0.
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
        if verbose:
                    image = Image.fromarray(
                        tile_raster_images(
                            X=dbn.rbm_layers[i].W.get_value(borrow=True).T,
                            img_shape=(28, 28),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)
                        )
                    )
                    image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch))

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(tile_raster_images(
                 X=rbm.W.get_value(borrow=True).T,
                 img_shape=(28, 28), tile_shape=(10, 10),
                 tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print ('Training took %f minutes' % (pretraining_time / 60.))

    #################################
    #     Sampling from the RBM     #
    #################################
    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(numpy.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX))

    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    [presig_hids, hid_mfs, hid_samples, presig_vis,
     vis_mfs, vis_samples], updates =  \
                        theano.scan(rbm.gibbs_vhv,
                                outputs_info=[None,  None, None, None,
                                              None, persistent_vis_chain],
                                n_steps=plot_every)

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                updates=updates,
                                name='sample_fn')

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1),
                             dtype='uint8')
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
                X=vis_mf,
                img_shape=(28, 28),
                tile_shape=(1, n_chains),
                tile_spacing=(1, 1))
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
    os.chdir('../')
Ejemplo n.º 8
0
from GRBM_DBN import test_GRBM_DBN
from load_data_MNIST import load_data

LAYER_SIZE = [256, 512, 1024]
N_LAYERS = [2, 3, 4]
ITERATIONS = 5

datasets = load_data()

for _ in range(ITERATIONS):
    for n_layers in N_LAYERS:
        for layer_size in LAYER_SIZE:
            test_score, val_score = test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75],
                pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002,
                momentum=0.9, batch_size=128, datasets=datasets,
                hidden_layers_sizes=n_layers*[layer_size], load=False,
                filename=('../data/MNIST_%d_%d.pickle'%(layer_size, n_layers)))

            log = '../data/MNIST.log'
            with open(log, 'a') as f:
                f.write('LAYER_SIZE=%d, n_layers=%d, test_score=%f%%, val_score=%f%%\n' % (layer_size, n_layers, test_score, val_score))

Ejemplo n.º 9
0
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

                    # save the best model
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))