Beispiel #1
0
def build_model(trainval_set, options):

    if options['retrain'] == 0:    

        if options['verbose'] > 4:
            print >> sys.stderr, ('... building the model')
        # construct the stacked denoising autoencoder class
    
        train_set_x, train_set_y = trainval_set

        #print train_set_x.get_value(borrow=True).shape
        #print train_set_y.shape.eval()

        n_train_batches  = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= options['batchsize']

        #print >> sys.stderr, options['nclasses']
        #print >> sys.stderr, train_set_y.eval()
        #aakak
        
        sda = SdA(numpy_rng=options['numpy_rng'], theano_rng=options['theano_rng'],
                  n_ins = options['ndim'],
                  hidden_layers_sizes=options['hlayers'],
                  n_outs=options['nclasses'], n_outs_b=options['nclasses'], tau=None)

        if options['verbose'] > 4:
            print >> sys.stderr, ('... getting the pretraining functions')
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=options['batchsize'], tau=None)

    else:
        # Restoring to Finetuned values
        sda_reuse_pt_model = []
        for para_copy in options['sda_reuse_model'].params:
            sda_reuse_pt_model.append(para_copy.get_value())

        ###
        sda = options['sda_reuse_model']
        
        for ids in range(len(sda.params)):
            sda.params_b[ids].set_value(sda_reuse_pt_model[ids]) # set the value


        n_outs = sda.params_b[-2].get_value().shape[0]        
        if options['nclasses_source'] != options['nclasses']:
            print >> sys.stderr, ("Droping logistic layer...")
            sda.change_lastlayer(n_outs,options['nclasses'])

        # print sda.params[1].get_value()[-1]
        # print sda.params_b[1].get_value()[-1]
        # kkk

        ########### Reuse layer wise fine-tuning #################
        #print '... getting the finetuning functions'
        #print 'Reuse layer wise finetuning'
        pretraining_fns = None
        
    return (sda,pretraining_fns)
def test_DimentionalReduction(dataset='mnist.pkl.gz',
                              pretraining_epochs=50,
                              pretrain_lr=0.01,
                              batch_size=5):
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    numpy_rng = numpy.random.RandomState(89677)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng,
              n_ins=28 * 28,
              hidden_layers_sizes=[300, 50, 2],
              n_outs=2)
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)
    corruption_levels = [0., 0., 0.]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
    target = train_set_x.get_value()
    for dA_layer in sda.dA_layers:
        hidden_values_function = dA_layer.get_hidden_values2(sda.x)
        result_function = theano.function(inputs=[sda.x],
                                          outputs=hidden_values_function)
        target = result_function(target)
        print target

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
    n = 0
    for x, y in zip(target, train_set_y.eval()):
        if y < len(colors):
            plt.scatter(x[0], x[1], c=colors[y])
            n += 1

        if n > 2000:
            break

    plt.show()
def test_DimentionalReduction(dataset='mnist.pkl.gz', pretraining_epochs=50, pretrain_lr=0.01, batch_size=5):
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    numpy_rng = numpy.random.RandomState(89677)
    
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[300, 50, 2],
        n_outs=2
    )
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)
    corruption_levels = [0., 0., 0.]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
    target = train_set_x.get_value()
    for dA_layer in sda.dA_layers:
        hidden_values_function = dA_layer.get_hidden_values2(sda.x)
        result_function = theano.function(inputs=[sda.x],outputs=hidden_values_function)
        target = result_function(target)
        print target

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','w']
    n = 0
    for x,y in zip(target,train_set_y.eval()):
        if y < len(colors):
            plt.scatter(x[0], x[1],c=colors[y])
            n += 1
            
        if n > 2000:
            break
        
    plt.show()
Beispiel #4
0
    def new(cls, n_ins, hidden_layers_sizes, n_outs, output_folder=None):
        numpy_rng = numpy.random.RandomState(89677)
        sda = SdA(numpy_rng=numpy_rng,
                  n_ins=n_ins,
                  hidden_layers_sizes=hidden_layers_sizes,
                  n_outs=n_outs)

        return cls(sda, output_folder)
def run_sda(datasets=None, batch_size=100,
            window_size=7, n_principle=3,
            pretraining_epochs=2000, pretrain_lr=0.02,
            training_epochs=10000,  finetune_lr=0.008, 
            hidden_layers_sizes=[310, 100], corruption_levels = [0., 0.]):
    """
    This function maps spatial PCs to a deep representation.
    
    Parameters:
    datasets:           A list containing 3 tuples. Each tuple have 2 entries, 
                        which are theano.shared variables. They stands for train,
                        valid, test data.
    batch_size:         Batch size.
    pretraining_epochs: Pretraining epoches.
    pretrain_lr:        Pretraining learning rate.
    training_epochs:    Fine-tuning epoches.
    finetune_lr:        Fine-tuning learning rate.
    hidden_layers_sizes:A list containing integers. Each intger specifies a size
                        of a hidden layer.
    corruption_levels:  A list containing floats in the inteval [0, 1]. Each 
                        number specifies the corruption level of its corresponding
                        hidden layer.

    Return:
    spatial_rep:        2-D numpy.array. Deep representation for each spatial sample.
    test_score:         Accuracy this representations yield on the trained SdA.
    """
    
    print 'finetuning learning rate=', finetune_lr
    print 'pretraining learning rate=', pretrain_lr
    print 'pretraining epoches=', pretraining_epochs
    print 'fine tuning epoches=', training_epochs
    print 'batch size=', batch_size
    print 'hidden layers sizes=', hidden_layers_sizes
    print 'corruption levels=', corruption_levels

    # compute number of minibatches for training, validation and testing
    n_train_batches = datasets[0][0].get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng, n_ins=datasets[0][0].get_value(borrow=True).shape[1],
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=gnd_img.max())

    ################################################################################
                               # PRETRAINING THE MODEL #
                               #########################

    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0],
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            
            if epoch % 100 == 0:
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    ################################################################################
                                # FINETUNING THE MODEL #
                                ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets, batch_size=batch_size,
        learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(10 * n_train_batches, patience / 2)
                            # go through this many
                            # minibatche before checking the network
                            # on the validation set; in this case we
                            # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if (this_validation_loss < best_validation_loss *
                        improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                (best_validation_loss * 100., test_score * 100.))
    print >> sys.stdout, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    # keep the following line consistent with line 227, function "prepare_data"
    filename = 'pavia_l1sda_pt%d_ft%d_lrp%.4f_f%.4f_bs%d_pca%d_ws%d' % \
                (pretraining_epochs, training_epochs, pretrain_lr, finetune_lr, 
                 batch_size, n_principle, window_size) 

    print '... saving parameters'
    sda.save_params(filename + '_params.pkl')

    print '... classifying test set with learnt model:'
    pred_func = theano.function(inputs=[sda.x], outputs=sda.logLayer.y_pred)
    pred_test = pred_func(datasets[2][0].get_value(borrow=True))
    true_test = datasets[2][1].get_value(borrow=True)
    true_valid = datasets[1][1].get_value(borrow=True)
    true_train = datasets[0][1].get_value(borrow=True)
    result_analysis(pred_test, true_train, true_valid, true_test)

    print '... classifying the whole image with learnt model:'
    print '...... extracting data'
    data_spectral, data_spatial, _, _ = \
        T_pca_constructor(hsi_img=img, gnd_img=gnd_img, n_principle=n_principle, 
                          window_size=window_size, flag='unsupervised', 
                          merge=True)
    
    start_time = time.clock()
    print '...... begin '
    y = pred_func(data_spectral) + 1
    print '...... done '
    end_time = time.clock()
    print 'finished, running time:%fs' % (end_time - start_time)

    y_rgb = cmap[y, :]
    margin = (window_size / 2) * 2  # floor it to a multiple of 2
    y_image = y_rgb.reshape(width - margin, height - margin, 3)
    scipy.misc.imsave(filename + 'wholeimg.png' , y_image)
    print 'Saving classification results'
    sio.savemat(filename + 'wholeimg.mat', 
                {'y': y.reshape(width - margin, height - margin)})
    
    ############################################################################
    print '... performing Student\'s t-test'
    best_c = 10000.
    best_g = 10.
    svm_classifier = svm.SVC(C=best_c, gamma=best_g, kernel='rbf')
    svm_classifier.fit(datasets[0][0].get_value(), datasets[0][1].get_value())

    data = [numpy.vstack((datasets[1][0].get_value(),
                          datasets[2][0].get_value())),
            numpy.hstack((datasets[1][1].get_value(),
                          datasets[2][1].get_value()))]
    numpy_rng = numpy.random.RandomState(89677)
    num_test = 100
    print 'Total number of tests: %d' % num_test
    k_sae = []
    k_svm = []
    for i in xrange(num_test):
        [_, _], [_, _], [test_x, test_y], _ = \
        train_valid_test(data, ratio=[0, 1, 1], batch_size=1, 
                         random_state=numpy_rng.random_integers(1e10))
        test_y = test_y + 1 # fix the label scale problem
        pred_y = pred_func(test_x)
        cm = confusion_matrix(test_y, pred_y)
        pr_a = cm.trace()*1.0 / test_y.size
        pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \
                (cm.sum(axis=1)*1.0/test_y.size)).sum()
        k_sae.append( (pr_a - pr_e) / (1 - pr_e) )

        pred_y = svm_classifier.predict(test_x)
        cm = confusion_matrix(test_y, pred_y)
        pr_a = cm.trace()*1.0 / test_y.size
        pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \
                (cm.sum(axis=1)*1.0/test_y.size)).sum()
        k_svm.append( (pr_a - pr_e) / (1 - pr_e) )

    std_k_sae = numpy.std(k_sae)
    std_k_svm = numpy.std(k_svm)
    mean_k_sae = numpy.mean(k_sae)
    mean_k_svm = numpy.mean(k_svm)
    left =    ( (mean_k_sae - mean_k_svm) * numpy.sqrt(num_test*2-2)) \
            / ( numpy.sqrt(2./num_test) * num_test * (std_k_sae**2 + std_k_svm**2) )

    rv = t(num_test*2.0 - 2)
    right = rv.ppf(0.95)

    print '\tstd\t\tmean'
    print 'k_sae\t%f\t%f' % (std_k_sae, mean_k_sae)
    print 'k_svm\t%f\t%f' % (std_k_svm, mean_k_svm)
    if left > right:
        print 'left = %f, right = %f, test PASSED.' % (left, right)
    else:
        print 'left = %f, right = %f, test FAILED.' % (left, right)
    
    
    return test_score
Beispiel #6
0
def run_sda(datasets=None,
            batch_size=100,
            window_size=7,
            n_principle=4,
            pretraining_epochs=2000,
            pretrain_lr=0.02,
            training_epochs=10000,
            finetune_lr=0.008,
            hidden_layers_sizes=[310, 100],
            corruption_levels=[0., 0.]):
    """
    This function maps spatial PCs to a deep representation.
    
    Parameters:
    datasets:           A list containing 3 tuples. Each tuple have 2 entries, 
                        which are theano.shared variables. They stands for train,
                        valid, test data.
    batch_size:         Batch size.
    pretraining_epochs: Pretraining epoches.
    pretrain_lr:        Pretraining learning rate.
    training_epochs:    Fine-tuning epoches.
    finetune_lr:        Fine-tuning learning rate.
    hidden_layers_sizes:A list containing integers. Each intger specifies a size
                        of a hidden layer.
    corruption_levels:  A list containing floats in the inteval [0, 1]. Each 
                        number specifies the corruption level of its corresponding
                        hidden layer.

    Return:
    spatial_rep:        2-D numpy.array. Deep representation for each spatial sample.
    test_score:         Accuracy this representations yield on the trained SdA.
    """

    print 'finetuning learning rate=', finetune_lr
    print 'pretraining learning rate=', pretrain_lr
    print 'pretraining epoches=', pretraining_epochs
    print 'fine tuning epoches=', training_epochs
    print 'batch size=', batch_size
    print 'hidden layers sizes=', hidden_layers_sizes
    print 'corruption levels=', corruption_levels

    # compute number of minibatches for training, validation and testing
    n_train_batches = datasets[0][0].get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng,
              n_ins=datasets[0][0].get_value(borrow=True).shape[1],
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=gnd_img.max())

    ################################################################################
    # PRETRAINING THE MODEL #
    #########################

    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0],
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))

            if epoch % 100 == 0:
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ################################################################################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(10 * n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print >> sys.stdout, ('The training code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    # keep the following line consistent with line 227, function "prepare_data"
    filename = 'ksc_l1sda_pt%d_ft%d_lrp%.4f_f%.4f_bs%d_pca%d_ws%d' % \
                (pretraining_epochs, training_epochs, pretrain_lr, finetune_lr,
                 batch_size, n_principle, window_size)

    print '... classifying test set with learnt model:'
    pred_func = theano.function(inputs=[sda.x], outputs=sda.logLayer.y_pred)
    pred_test = pred_func(datasets[2][0].get_value(borrow=True))
    true_test = datasets[2][1].get_value(borrow=True)
    true_valid = datasets[1][1].get_value(borrow=True)
    true_train = datasets[0][1].get_value(borrow=True)
    result_analysis(pred_test, true_train, true_valid, true_test)

    print '... classifying the whole image with learnt model:'
    print '...... extracting data'
    data_spectral, data_spatial, _, _ = \
        T_pca_constructor(hsi_img=img, gnd_img=gnd_img, n_principle=n_principle,
                          window_size=window_size, flag='unsupervised',
                          merge=True)

    start_time = time.clock()
    print '...... begin '
    y = pred_func(data_spectral) + 1
    print '...... done '
    end_time = time.clock()
    print 'finished, running time:%fs' % (end_time - start_time)

    y_rgb = cmap[y, :]
    margin = (window_size / 2) * 2  # floor it to a multiple of 2
    y_image = y_rgb.reshape(width - margin, height - margin, 3)
    scipy.misc.imsave(filename + 'wholeimg.png', y_image)
    print 'Saving classification results'
    sio.savemat(filename + 'wholeimg.mat',
                {'y': y.reshape(width - margin, height - margin)})

    ############################################################################
    print '... performing Student\'s t-test'
    best_c = 10000.
    best_g = 10.
    svm_classifier = svm.SVC(C=best_c, gamma=best_g, kernel='rbf')
    svm_classifier.fit(datasets[0][0].get_value(), datasets[0][1].get_value())

    data = [
        numpy.vstack((datasets[1][0].get_value(), datasets[2][0].get_value())),
        numpy.hstack((datasets[1][1].get_value(), datasets[2][1].get_value()))
    ]
    numpy_rng = numpy.random.RandomState(89677)
    num_test = 100
    print 'Total number of tests: %d' % num_test
    k_sae = []
    k_svm = []
    for i in xrange(num_test):
        [_, _], [_, _], [test_x, test_y], _ = \
        train_valid_test(data, ratio=[0, 1, 1], batch_size=1,
                         random_state=numpy_rng.random_integers(1e10))
        test_y = test_y + 1  # fix the label scale problem
        pred_y = pred_func(test_x)
        cm = confusion_matrix(test_y, pred_y)
        pr_a = cm.trace() * 1.0 / test_y.size
        pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \
                (cm.sum(axis=1)*1.0/test_y.size)).sum()
        k_sae.append((pr_a - pr_e) / (1 - pr_e))

        pred_y = svm_classifier.predict(test_x)
        cm = confusion_matrix(test_y, pred_y)
        pr_a = cm.trace() * 1.0 / test_y.size
        pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \
                (cm.sum(axis=1)*1.0/test_y.size)).sum()
        k_svm.append((pr_a - pr_e) / (1 - pr_e))

    std_k_sae = numpy.std(k_sae)
    std_k_svm = numpy.std(k_svm)
    mean_k_sae = numpy.mean(k_sae)
    mean_k_svm = numpy.mean(k_svm)
    left =    ( (mean_k_sae - mean_k_svm) * numpy.sqrt(num_test*2-2)) \
            / ( numpy.sqrt(2./num_test) * num_test * (std_k_sae**2 + std_k_svm**2) )

    rv = t(num_test * 2.0 - 2)
    right = rv.ppf(0.95)

    print '\tstd\t\tmean'
    print 'k_sae\t%f\t%f' % (std_k_sae, mean_k_sae)
    print 'k_svm\t%f\t%f' % (std_k_svm, mean_k_svm)
    if left > right:
        print 'left = %f, right = %f, test PASSED.' % (left, right)
    else:
        print 'left = %f, right = %f, test FAILED.' % (left, right)

    return test_score
Beispiel #7
0
print 'pretraining epoches=', pretraining_epochs
print 'fine tuning epoches=', training_epochs
print 'batch size=', batch_size
print 'hidden layers sizes=', hidden_layers_sizes
print 'corruption levels=', corruption_levels

# compute number of minibatches for training, validation and testing
n_train_batches = datasets[0][0].get_value(borrow=True).shape[0]
n_train_batches /= batch_size

# numpy random generator
numpy_rng = numpy.random.RandomState(89677)
print '... building the model'
# construct the stacked denoising autoencoder class
sda = SdA(numpy_rng=numpy_rng, n_ins=bands,
          hidden_layers_sizes=hidden_layers_sizes,
          n_outs=gnd_img.max())
             
                           #########################
                           # PRETRAINING THE MODEL #
                           #########################
print '... getting the pretraining functions'
pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0],
                                            batch_size=batch_size)

print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
for i in xrange(sda.n_layers):
    # go through pretraining epochs
    for epoch in xrange(pretraining_epochs):
print 'pretraining epoches=', pretraining_epochs
print 'fine tuning epoches=', training_epochs
print 'batch size=', batch_size
print 'hidden layers sizes=', hidden_layers_sizes
print 'corruption levels=', corruption_levels

# compute number of minibatches for training, validation and testing
n_train_batches = datasets[0][0].get_value(borrow=True).shape[0]
n_train_batches /= batch_size

# numpy random generator
numpy_rng = numpy.random.RandomState(89677)
print '... building the model'
# construct the stacked denoising autoencoder class
sda = SdA(numpy_rng=numpy_rng,
          n_ins=datasets[0][0].get_value(borrow=True).shape[1],
          hidden_layers_sizes=hidden_layers_sizes,
          n_outs=gnd_img.max())

################################################################################
# PRETRAINING THE MODEL #
#########################

print '... getting the pretraining functions'
pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0],
                                            batch_size=batch_size)

print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
for i in xrange(sda.n_layers):
    # go through pretraining epochs
    train_set_x = theano.shared(numpy.asarray(numpy.vstack(cute_data),
                                               dtype=theano.config.floatX),
                                 borrow=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print('... building the model')
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=data.shape[1],
        hidden_layers_sizes=[1000, 500, 20, data.shape[1]],
        n_outs=10
    )
    # end-snippet-3 start-snippet-4

    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)
    print('... pre-training the model')
    start_time = timeit.default_timer()
    ## Pre-train layer-wise
    corruption_levels = [.1, .1, .1, .1]
    for i in range(sda.n_layers):
        # go through pretraining epochs
        for epoch in range(pretraining_epochs):
            # go through the training set
            c = []
Beispiel #10
0
def main():
    # setup output directory
    d = datetime.datetime.today()
    output_folder = "out/{}-{}-{}_{}:{}:{}".format(d.year, d.month, d.day,
                                                   d.hour, d.minute, d.second)
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # load dataset
    datasets = load_data()

    train_set_x, train_set_y = util.shared_dataset(datasets[0])
    valid_set_x, valid_set_y = util.shared_dataset(datasets[1])
    test_set_x, test_set_y = util.shared_dataset(datasets[2])

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)
    test_set = (test_set_x, test_set_y)

    n_input = train_set_x.get_value(borrow=True).shape[1]
    n_output = train_set_y.get_value(borrow=True).shape[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng,
              n_ins=n_input,
              hidden_layers_sizes=[1000, 1000, 1000],
              n_outs=n_output)

    predict_fn = sda.build_predict_function()

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [.1, .2, .3]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            print("Pre-training layer {}, epoch {}, cost ".format(i, epoch)),
            print("{}".format(numpy.mean(c)))

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=(train_set, valid_set, test_set),
        batch_size=batch_size,
        learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%, '
           'on iteration %i, '
           'with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ###########
    # PREDICT #
    ###########
    y_pred = predict_fn(test_set_x.get_value(borrow=True))
    mae, mre = util.calculate_error_indexes(test_set_y, y_pred)
    print("-*-*RESULT*-*-")
    print("mae={}".format(mae))
    print("mre={}".format(mre))

    # plot
    for i in xrange(n_output):
        filename = "{}.png".format(str(i))
        plot.savefig(filename, test_set_x, y_pred, indexes=[i])
Beispiel #11
0
def main():
    # setup output directory
    d = datetime.datetime.today()
    output_folder = "out/{}-{}-{}_{}:{}:{}".format(d.year, d.month, d.day, d.hour, d.minute, d.second)
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # load dataset
    datasets = load_data()

    train_set_x, train_set_y = util.shared_dataset(datasets[0])
    valid_set_x, valid_set_y = util.shared_dataset(datasets[1])
    test_set_x, test_set_y = util.shared_dataset(datasets[2])

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)
    test_set = (test_set_x, test_set_y)

    n_input = train_set_x.get_value(borrow=True).shape[1]
    n_output = train_set_y.get_value(borrow=True).shape[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=n_input,
        hidden_layers_sizes=[1000, 1000, 1000],
        n_outs=n_output
    )

    predict_fn = sda.build_predict_function()

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [.1, .2, .3]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            print("Pre-training layer {}, epoch {}, cost ".format(i, epoch)),
            print("{}".format(numpy.mean(c)))

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))


    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=(train_set, valid_set, test_set),
        batch_size=batch_size,
        learning_rate=finetune_lr
    )

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                                this_validation_loss < best_validation_loss *
                                improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    ###########
    # PREDICT #
    ###########
    y_pred = predict_fn(test_set_x.get_value(borrow=True))
    mae, mre = util.calculate_error_indexes(test_set_y, y_pred)
    print("-*-*RESULT*-*-")
    print("mae={}".format(mae))
    print("mre={}".format(mre))

    # plot
    for i in xrange(n_output):
        filename = "{}.png".format(str(i))
        plot.savefig(filename, test_set_x, y_pred, indexes=[i])
Beispiel #12
0
def build_model(trainval_set, options):

    if options['retrain'] == 0:

        if options['verbose'] > 4:
            print >> sys.stderr, ('... building the model')
        # construct the stacked denoising autoencoder class

        train_set_x, train_set_y = trainval_set

        #print train_set_x.get_value(borrow=True).shape
        #print train_set_y.shape.eval()

        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= options['batchsize']

        #print >> sys.stderr, options['nclasses']
        #print >> sys.stderr, train_set_y.eval()
        #aakak

        sda = SdA(numpy_rng=options['numpy_rng'],
                  theano_rng=options['theano_rng'],
                  n_ins=options['ndim'],
                  hidden_layers_sizes=options['hlayers'],
                  n_outs=options['nclasses'],
                  n_outs_b=options['nclasses'],
                  tau=None)

        if options['verbose'] > 4:
            print >> sys.stderr, ('... getting the pretraining functions')
        pretraining_fns = sda.pretraining_functions(
            train_set_x=train_set_x, batch_size=options['batchsize'], tau=None)

    else:
        # Restoring to Finetuned values
        sda_reuse_pt_model = []
        for para_copy in options['sda_reuse_model'].params:
            sda_reuse_pt_model.append(para_copy.get_value())

        ###
        sda = options['sda_reuse_model']

        for ids in range(len(sda.params)):
            sda.params_b[ids].set_value(
                sda_reuse_pt_model[ids])  # set the value

        n_outs = sda.params_b[-2].get_value().shape[0]
        if options['nclasses_source'] != options['nclasses']:
            print >> sys.stderr, ("Droping logistic layer...")
            sda.change_lastlayer(n_outs, options['nclasses'])

        # print sda.params[1].get_value()[-1]
        # print sda.params_b[1].get_value()[-1]
        # kkk

        ########### Reuse layer wise fine-tuning #################
        #print '... getting the finetuning functions'
        #print 'Reuse layer wise finetuning'
        pretraining_fns = None

    return (sda, pretraining_fns)
Beispiel #13
0
def pretrain_SdA(pretraining_epochs=50, pretrain_lr=0.001, batch_size=100):
    """
    
    Pretrain an SdA model for the given number of training epochs.  The model is either initialized from scratch, or 
    is reconstructed from a previously pickled model.

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type batch_size: int
    :param batch_size: train in mini-batches of this size

    """
    
    current_dir = os.getcwd()    

    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = "stacked_denoising_autoencoder_" + options.arch + "." + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(options.inputfile), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 25, offset = options.offset)
    train_set_x = load_data_unlabeled(datafiles)
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= batch_size
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    
    
    # Check if we can restore from a previously trained model,    
    # otherwise construct a new SdA
    if options.restorefile is not None:
        print >> output_file, 'Unpickling the model from %s ...' % (options.restorefile)
        current_dir = os.getcwd()    
        os.chdir(options.dir)         
        f = file(options.restorefile, 'rb')
        sda_model = cPickle.load(f)
        f.close()        
        os.chdir(current_dir)
    else:
        print '... building the model'
        arch_list_str = options.arch.split("-")
        arch_list = [int(item) for item in arch_list_str]
        corruption_list = [options.corruption for i in arch_list]
        sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=arch_list,
              corruption_levels = corruption_list,
              n_outs=-1)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    
    ## Pre-train layer-wise
    corruption_levels = sda_model.corruption_levels
    learning_rates = [pretrain_lr * 10. for i in arch_list]
    learning_rates[0] = pretrain_lr    
    
    for i in xrange(sda_model.n_layers):
                       
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=learning_rates[i],
                         momentum=options.momentum,
                         weight_decay=options.weight_decay))
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)
            
        if options.savefile is not None:
            print >> output_file, 'Pickling the model...'
            current_dir = os.getcwd()    
            os.chdir(options.dir)            
            f = file(options.savefile, 'wb')
            cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
            os.chdir(current_dir)

    end_time = time.clock()

    print >> output_file, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    output_file.close()
def test_mom_wd(filename, num_epochs=10, momentum=0., weight_decay=0., pretrain_lr=0.001, batch_size=10):
    """
    
    Pretrain an SdA model using momentum, weight-decay, or both
    for the given number of training epochs.  The model is initialized from scratch.

    :type filename: string
    :param filename: the prefix for the name of the file capturing the output of this test

    :type num_epochs: int
    :param num_epochs: number of epoch to do pretraining

    :type momentum: float
    :param momentum: momentum rate for updating parameters when pre-training
    
    :type weight_decay: float
    :param weight_decay: multiplicative factor for degrading the size of updates to weights
    effectively penalizing larger weights.

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type batch_size: int
    :param batch_size: train in mini-batches of this size

    """
    
    current_dir = os.getcwd()    

    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = filename + "_sda_pretrain." + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(options.inputfile), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 10)
    train_set_x = load_data_unlabeled(datafiles)
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= batch_size
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'

    sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=[700, 700, 300, 50],
              corruption_levels = [0.2,0.2,0.2,0.2],
              n_outs=3, dA_losses=['squared','xent','xent','xent'])

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [float(options.corruption), float(options.corruption), float(options.corruption), float(options.corruption)]
    for i in xrange(sda_model.n_layers):
        
        for epoch in xrange(num_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr,
                         momentum=momentum,
                         weight_decay=weight_decay))
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)

    end_time = time.clock()

    print >> output_file, ('Pretraining time for file ' +
                          os.path.split(__file__)[1] +
                          ' was %.2fm to go through %i epochs' % (((end_time - start_time) / 60.), (num_epochs / 2)))
  
    
    output_file.close()   
Beispiel #15
0
    w_test *= 1./split_level
    w_train *= 1./(1. - split_level)
   

    ###################### Build Model ########################
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = X.shape[0]
    n_train_batches /= batch_size

    # np random generator
    np_rng = np.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=np_rng, n_ins=39,
              hidden_layers_sizes = hidden_layers_sizes,
              n_outs=2)

    if load_pretrain:  
        f = open(pre_name, "rb")
        sda.load(f)
        f.close()
    else:
        #########################
        # PRETRAINING THE MODEL #
        #########################

        print '... getting the pretraining functions'
        pretraining_fns = sda.pretraining_functions(train_set_x=theano.shared(X),
                                                batch_size=batch_size)
Beispiel #16
0
def run_classification(pretrain_lr=0.001, # SdA and DBN
                       learning_rate=0.01, 
                       L1_reg=0.001,
                       L2_reg=0.0001,
                       pretraining_epochs=3, # SdA and DBN
                       n_epochs=5,
                       batch_size=64,
                       display_step=1000,
                       dataset='mnist.pkl.gz',
                       n_in=28*28, # mnist image shape
                       input_shape=(-1,1,28,28), # CNN and LeNet5, this is MNIST dimensions
                       n_out=10, # number of MNIST classes
                       n_hidden=1000, # (1-layer) MLP
                       hidden_layers_sizes=[500,500,500],
                       CNN_filter_size=20, # CNN
                       LeNet5_filter_sizes=[50,20], # LeNet5
                       corruption_levels=[0.1,0.2,0.3], # SdA
                       k=1, # DBN
                       # model_name can be the name of a model to create,
                       # or file path to load a saved file
                       model_name='LogisticRegression',
                       best_model_file_path='best_model.pkl'
):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """

    ######################
    # Instance Variables #
    ######################
    # instance variables to be used in some of the models below
    numpy_rng = np.random.RandomState(1234)

    #############
    # Load Data #
    #############
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    val_set_x, val_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    ###################################
    # Calculate number of Minibatches #
    ###################################
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_val_batches = val_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size


    ############################################
    # allocate symbolic variables for the data #
    ############################################
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    ###############
    # BUILD MODEL #
    ###############
    print('... building the model')
    model=None

    if model_name == 'LogisticRegression':
        model = LogisticRegression(
            input=x,
            n_in=n_in,
            n_out=n_out
        )
    elif model_name == 'MLP':
        model = MLP(
            numpy_rng=numpy_rng,
            input=x,
            n_in=n_in,
            n_hidden=n_hidden,
            n_out=n_out
        )
    elif model_name == 'DeepMLP':
        model = DeepMLP(
            numpy_rng=numpy_rng,
            input=x,
            n_in=n_in,
            hidden_layers_sizes=hidden_layers_sizes,
            n_out=n_out
        )
    elif model_name == 'CNN':
        model = CNN(
                numpy_rng=numpy_rng,
                input=x,
                input_shape=input_shape,
                filter_sizes=[CNN_filter_size],
                n_out=n_out,
                batch_size=batch_size
            )
    elif model_name == 'LeNet5':
        model = LeNet5(
            numpy_rng=numpy_rng,
            input=x,
            input_shape=input_shape,
            filter_sizes=LeNet5_filter_sizes,
            n_out=n_out,
            batch_size=batch_size
        )
    elif model_name == 'SdA':
        model = SdA(
            numpy_rng=numpy_rng,
            input=x,
            n_in=n_in,
            hidden_layers_sizes=hidden_layers_sizes,
            n_out=n_out
        )
    elif model_name == 'DBN':
        model = DBN(
            numpy_rng=numpy_rng, 
            input=x,
            n_in=n_in,
            hidden_layers_sizes=hidden_layers_sizes,
            n_out=n_out
        )
    # Assume the model_name is a path
    elif model_name != None:
        try:
            model = pickle.load(open(model_name))
        except:
            raise "Error! Model file path not valid."
    else:
        raise "Error! No model selected."

    #########################################
    # PRETRAINING THE MODEL (SdA, DBN Only) #
    #########################################
    if (model_name == 'SdA') or (model_name == 'DBN'):
        print('... starting pretraining')

        #########################
        # PreTraining Functions #
        #########################
        print('... getting the pretraining functions')

        if model_name == 'SdA':
            pretraining_fns = model.pretraining_functions(
                                x=x, # I had to move x here, instead of in the model, or there was an error.
                                train_set_x=train_set_x,
                                batch_size=batch_size) 
        
        elif model_name == 'DBN':
            pretraining_fns = model.pretraining_functions(
                                x=x, # I had to move x here, instead of in the model, or there was an error.
                                train_set_x=train_set_x,
                                batch_size=batch_size,
                                k=k)

        ##################
        # PRETRAIN MODEL #
        ##################
        print('... pre-training the model')
        start_time = timeit.default_timer()

        if model_name == 'SdA':
            corruption_levels = [.1, .2, .3]
        ## Pre-train layer-wise
        for i in range(model.n_layers):
            # go through pretraining epochs
            for epoch in range(pretraining_epochs):
                # go through the training set
                cost = []
                for batch_index in range(n_train_batches):

                    if model_name == 'SdA':
                        cost.append(
                            pretraining_fns[i](index=batch_index,
                                               corruption=corruption_levels[i],
                                               lr=pretrain_lr)
                        )
                    elif model_name == 'DBN':
                        cost.append(
                            pretraining_fns[i](index=batch_index,
                                               lr=pretrain_lr)
                        )

                print('Pre-training layer %i, epoch %d, cost %f' % 
                      (i, epoch+1, np.mean(cost, dtype='float64'))
                )

        end_time = timeit.default_timer()

        print(('The pretraining code for file ' +
               os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

        print('...End of pre-training')
    
    #####################
    # Training Function #
    #####################
    cost, updates = model.get_cost_updates(
        y=y,
        L1_reg = L1_reg, 
        L2_reg = L2_reg,
        learning_rate=learning_rate
    )

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    
    train_model = theano.function(
        inputs=[index],
        outputs=model.get_latest_cost(),
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        },
        name='train'
    )
    
    ##################################
    # Validation & Testing Functions #
    ##################################

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    validate_model = theano.function(
        inputs=[index],
        outputs=[model.errors(y), model.get_loss(), model.get_L1(), model.get_L2_sqr()],
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size]
        },
        name='validate'
    )

    test_model = theano.function(
        inputs=[index],
        outputs=model.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        },
        name='test'
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10 * n_train_batches # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    minibatch_training_costs = []

    # go through training epochs
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        
        for minibatch_index in range(n_train_batches):

            #################
            # Training Step #
            #################
            latest_minibatch_training_cost = train_model(minibatch_index)
            minibatch_training_costs.append(latest_minibatch_training_cost)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % display_step == 0:
                print('training @ iter = ', iter)

            if (iter + 1) % validation_frequency == 0:

                #################
                # Training Loss #
                #################
                this_training_loss = np.mean(minibatch_training_costs, dtype='float64')

                print('latest average training loss: %f' % (this_training_loss))
                minibatch_training_costs = []

                ###################
                # Validation Loss #
                ###################
                validation_losses = [validate_model(i)[0] for i in range(n_val_batches)]
                this_validation_loss = np.mean(validation_losses, dtype='float64')

                print('epoch %i, minibatch %i/%i, validation error %f %%' % 
                        (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)
                )

                ########################
                # Validation Sublosses #
                ########################
                # Latest sublosses for our models include: unregularized loss, L1_norm, L2_norm
                unregularized_losses = [validate_model(i)[1] for i in range(n_val_batches)]
                this_unregularized_loss = np.mean(unregularized_losses, dtype='float64')
                L1_losses = [validate_model(i)[2] for i in range(n_val_batches)]
                this_L1_loss = np.mean(L1_losses, dtype='float64')
                L2_sqr_losses = [validate_model(i)[3] for i in range(n_val_batches)]
                this_L2_sqr_loss = np.mean(L2_sqr_losses, dtype='float64')
                print('latest total validation loss: %f' % (this_unregularized_loss + this_L1_loss + this_L2_sqr_loss) )
                print('latest unregularized loss: %f' % (this_unregularized_loss) )
                print('latest L1_norm: %f' % (this_L1_loss) )
                print('latest L2_norm: %f' % (this_L2_sqr_loss) )

                ###################
                # Save Best Model #
                ###################
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < (best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    ###################
                    # Test Best Model #
                    ###################
                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    test_score = np.mean(test_losses, dtype='float64')

                    print(('     epoch %i, minibatch %i/%i, test error of best model %f %%') % 
                            (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)
                    )

                    ###################
                    # Sav Best Model #
                    ###################
                    with open(best_model_file_path, 'wb') as f:
                        pickle.dump(model, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time)))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def pretrain(shared_args, private_args): 
    """ Pretrain an SdA model for the given number of training epochs.  The model is either initialized from 
    scratch, or is reconstructed from a previously pickled model.

    :type shared_args: dict
    :param shared_args: dict containing all the arguments common to both models.

    :type private_args: dict
    :param private_args: dict containing all the arguments specific to each model spawned off this first process.
    
    """
    
    # Import sandbox.cuda to bind the specified GPU to this subprocess
    # then import the remaining theano and model modules.
    import theano.sandbox.cuda
    theano.sandbox.cuda.use(private_args['gpu'])
    
    import theano
    import theano.tensor as T
    from theano.tensor.shared_randomstreams import RandomStreams
    from SdA import SdA    
    
    shared_args_dict = shared_args[0]
    
    current_dir = os.getcwd()    
    
    os.chdir(shared_args_dict['dir'])
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    arch_list = get_arch_list(private_args)            
    corruption_list = [shared_args_dict['corruption'] for i in arch_list]
    layer_types = parse_layer_type(shared_args_dict['layertype'], len(arch_list))    
    
    output_filename = "hybrid_pretraining_sda_" + "_".join(elem for elem in layer_types) + private_args['arch'] + "." + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(shared_args_dict['input']), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 30, offset = shared_args_dict['offset'])
    if datafiles is None:
            print("No data was returned, exiting.")
            data_set_file.close()
            output_file.close()
            return    
    
    train_set_x = load_data_unlabeled(datafiles)

    # DEBUG: get validation set too
    validation_datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 5, offset = shared_args_dict['offset'] + 30)
    valid_set_x = load_data_unlabeled(validation_datafiles)      
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= shared_args_dict['batch_size']
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    
    # Set the initial value of the learning rate
    learning_rate = theano.shared(numpy.asarray(shared_args_dict['pretrain_lr'], 
                                             dtype=theano.config.floatX))     
    
    
    # Check if we can restore from a previously trained model,    
    # otherwise construct a new SdA
    if private_args.has_key('restore'):
        print >> output_file, 'Unpickling the model from %s ...' % (private_args['restore'])
        current_dir = os.getcwd()    
        os.chdir(shared_args_dict['dir'])         
        f = file(private_args['restore'], 'rb')
        sda_model = cPickle.load(f)
        f.close()        
        os.chdir(current_dir)
    else:
        print '... building the model'  
        
        sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=arch_list,
              corruption_levels = corruption_list,
              layer_types=layer_types,
              loss=shared_args_dict['loss'],
              n_outs=-1,
              sparse_init=shared_args_dict['sparse_init'],
              opt_method=shared_args_dict['opt_method'])

    #########################
    # PRETRAINING THE MODEL #
    #########################    
    
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=shared_args_dict['batch_size'],
                                                learning_rate=learning_rate,
                                                method='cm')

    print '... getting the hybrid training functions'
    hybrid_pretraining_fns = sda_model.build_finetune_limited_reconstruction(train_set_x=train_set_x, 
                                                                      batch_size=shared_args_dict['batch_size'], 
                                                                      learning_rate=learning_rate,
                                                                      method='cm')
    
    # DEBUG: get full finetuning theano function
    # get the training, validation function for the model
    datasets = (train_set_x,valid_set_x)
        
    print '... getting the finetuning functions'
    finetune_train_fn, validate_model = sda_model.build_finetune_full_reconstruction(
                datasets=datasets, batch_size=shared_args_dict['batch_size'],
                learning_rate=learning_rate,
                method='cm')    

    
    # DEBUG: should only have n_layers - 2 hybrid pretraining functions
    assert len(hybrid_pretraining_fns) == sda_model.n_layers - 2
    
    print '... writing meta-data to output file'
    metadict = {'n_train_batches': n_train_batches}
    metadict = dict(metadict.items() + shared_args_dict.items())
    write_metadata(output_file, metadict)    
    
    print '... pre-training the model'
    start_time = time.clock()
    
    # Get corruption levels from the SdA.  
    corruption_levels = sda_model.corruption_levels
    
    # Function to decrease the learning rate
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
                updates={learning_rate: learning_rate * shared_args_dict['lr_decay']})  
    
    # Function to reset the learning rate
    lr_val = T.scalar('original_lr')
    reset_learning_rate = theano.function(inputs=[lr_val], outputs=learning_rate,
                updates={learning_rate: lr_val})
    
    # Set up functions for max norm regularization
    apply_max_norm_regularization = sda_model.max_norm_regularization()  
    
    for i in xrange(sda_model.n_layers):       
                
        for epoch in xrange(shared_args_dict['pretraining_epochs']):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],momentum=shared_args_dict['momentum']))
                                
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)
            print >> output_file, learning_rate.get_value(borrow=True)
            decay_learning_rate()
            apply_max_norm_regularization(norm_limit=shared_args_dict['maxnorm'])
        
        # Do hybrid pretraining only on the middle layer(s)
        if i > 0 and i < sda_model.n_layers - 1:
            for h_epoch in xrange(20):
                hybrid_c = []
                for batch_index in xrange(n_train_batches):
                    hybrid_c.append(hybrid_pretraining_fns[i-1](index=batch_index,momentum=shared_args_dict['momentum']))  
                print >> output_file, "Hybrid pre-training on layers %i and below, epoch %d, cost" % (i, h_epoch),
                print >> output_file, numpy.mean(hybrid_c)
        
        # Reset the learning rate
        reset_learning_rate(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=numpy.float32))
        
        if private_args.has_key('save'):
            print >> output_file, 'Pickling the model...'
            current_dir = os.getcwd()    
            os.chdir(shared_args_dict['dir'])            
            f = file(private_args['save'], 'wb')
            cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
            os.chdir(current_dir)

    print '... finetuning with final layer'
    best_validation_loss = numpy.inf
    for f_epoch in xrange(20):
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = finetune_train_fn(minibatch_index, shared_args_dict['momentum'])
                    
            # DEBUG: monitor the training error
            print >> output_file, ('Fine-tuning epoch %i, minibatch %i/%i, training error %f ' %
                    (f_epoch, minibatch_index + 1, n_train_batches,
                    minibatch_avg_cost))            
    
            # apply max-norm regularization
            apply_max_norm_regularization(shared_args_dict['maxnorm'])          
    
        # validate every epoch               
        validation_losses = validate_model()
        this_validation_loss = numpy.mean(validation_losses)
        
        # save best model that achieved this best loss  
        if this_validation_loss < best_validation_loss:  
            print >> output_file, 'Pickling the model...'  
            current_dir = os.getcwd()    
            os.chdir(shared_args_dict['dir'])            
            f = file(private_args['save'], 'wb')
            cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
            os.chdir(current_dir)
            
        print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f ' %
              (f_epoch, minibatch_index + 1, n_train_batches,
               this_validation_loss))        

    end_time = time.clock()

    print >> output_file, ('The hybrid training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    output_file.close()        
Beispiel #18
0
def test_SdA(sample_size         = 60,
             finetune_lr         = 0.01, 
             pretraining_epochs  = 20,
             pretrain_lr         = 0.01, 
             training_epochs     = 100, 
             batch_size          = 30,
             corruption_levels   = [0.2],
             hidden_layers_sizes = [2000],
             img_size = (1020,1020),
             img_size_test = (600,1020)):
    
    process = Process()
    img_input,img_labels = process.read_in_images(["train-input"],["train-labels"])
    
    img_input = process.normalize(img_input)  
    #img_input = process.apply_clahe(img_input)
    #img_input = process.local_normalization(img_input)  
    
    img_input = img_input[:,:img_size[0],:img_size[1]]
    
    train_set  = img_input
    valid_set  = img_input[:1]
    test_set   = img_input[:1,:img_size_test[0],:img_size_test[1]]
    
    train_set_x,train_set_y = process.manipulate(train_set),train_set
    valid_set_x,valid_set_y = process.manipulate(valid_set),valid_set
    test_set_x,test_set_y   = process.manipulate(test_set),test_set
    
    train_set_x, table   =process.generate_set(train_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size)
    valid_set_x, table   =process.generate_set(valid_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size)
    test_set_x, table    =process.generate_set(test_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size_test)
    train_set_y, table   =process.generate_set(train_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size)
    valid_set_y, table   =process.generate_set(valid_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size)
    test_set_y, table    =process.generate_set(test_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size_test)
    
    train_set_x,train_set_y = train_set_x.astype(np.float32),train_set_y.astype(np.float32)
    valid_set_x,valid_set_y = valid_set_x.astype(np.float32),valid_set_y.astype(np.float32)
    test_set_x,test_set_y   = test_set_x.astype(np.float32), test_set_y.astype(np.float32)
    
    train_set_x, train_set_y = theano.shared(train_set_x,borrow=True),theano.shared(train_set_y,borrow=True)
    valid_set_x, valid_set_y = theano.shared(valid_set_x,borrow=True),theano.shared(valid_set_y,borrow=True)
    test_set_x, test_set_y   = theano.shared(test_set_x,borrow=True),theano.shared(test_set_y,borrow=True)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    np_rng = np.random.RandomState()
    print '... building the model'

    sda = SdA(
        numpy_rng = np_rng,
        n_ins     = sample_size**2,
        n_outs    = sample_size**2,
        hidden_layers_sizes = hidden_layers_sizes
    )

    print '... Initializing pretraining functions'
    pretraining_fns, output_fn = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)
    
    print '... Layer-wise training of model'
    start_time = time.clock()

    for i in xrange(sda.n_layers):
        for epoch in xrange(pretraining_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index      = batch_index,
                                            corruption = corruption_levels[i],
                                            lr         = pretrain_lr))
            print 'Layer %i, epoch %d, cost ' % (i, epoch),
            print np.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('Layer-wise training ran for %.2fm' % ((end_time - start_time) / 60.))
    
    ########################
    # FINETUNING THE MODEL #
    ########################

    
    datasets = [(train_set_x,train_set_y),(valid_set_x,valid_set_y),(test_set_x,test_set_y)]
    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model,output_fn = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )
    
    print '... finetuning of model'
    
    for n in xrange(training_epochs):
        costs = []
        for i in xrange(n_train_batches):
            costs.append(train_fn(i))
        
        cost = np.mean(costs)    
        #val_cost = validate_model()
        
        print "Epoch:",n,"Cost:",cost #,"Validation cost:",val_cost
    
    print "Test cost:", test_model()
     
    out = np.zeros((0,sample_size**2))
    for batch_index in xrange(train_set_x.get_value().shape[0]):
        out = np.vstack((out,output_fn(batch_index)))
        
    img_output = process.post_process(out, table, sample_size,img_shape=img_size_test)
        
    plt.figure()
    plt.imshow(test_set[0],cmap=plt.cm.gray)
    plt.figure()
    plt.imshow(img_output[0],cmap=plt.cm.gray)
    
    xz = process.xz_stack(img_input)
    
    for m in xrange(xz.shape[0]):
        for n in xrange(xz.shape[1]):
            xz[m,n] = (xz[m,n]-xz[m,n].mean())/xz[m,n].std()
    
    xz_train, table    =process.generate_set(xz, sample_size = sample_size, stride = sample_size, img_size = img_size_test)
    xz_train = xz_train.astype(np.float32)
    test_set_x.set_value(xz_train)
    
    out = np.zeros((0,sample_size**2))
    for batch_index in xrange(train_set_x.get_value().shape[0]):
        out = np.vstack((out,output_fn(batch_index)))
        
    img_output = process.post_process(out, table, sample_size,img_shape=img_size_test)
    
    plt.figure()
    plt.imshow(xz[0],cmap=plt.cm.gray)
    plt.figure()
    plt.imshow(img_output[0],cmap=plt.cm.gray)
    
    plt.show()
Beispiel #19
0
def test_restrict_norm_SdA(num_epochs=10, pretrain_lr=0.00001, lr_decay = 0.98, batch_size=20):
    """
    
    Pretrain an SdA model for the given number of training epochs, applying norm restrictions on the W matrices.  Try ReLU units, since their weights seem to blow up 
    on this data set.

    :type num_epochs: int
    :param num_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type batch_size: int
    :param batch_size: train in mini-batches of this size

    """
    
    layer_types=['ReLU','ReLU']
    current_dir = os.getcwd()       
    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = "test_max_norm_sda_." + '_'.join([elem for elem in layer_types]) + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(options.inputfile), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 10)
    train_set_x = load_data_unlabeled(datafiles, features = (5,20))
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= batch_size
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    
    # Set the initial value of the learning rate
    learning_rate = theano.shared(numpy.asarray(pretrain_lr, 
                                             dtype=theano.config.floatX))
    
    # Function to decrease the learning rate
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
                    updates={learning_rate: learning_rate * lr_decay})    

    sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=[5, 5],
              corruption_levels = [0.25, 0.25],
              layer_types=layer_types)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                learning_rate=learning_rate)

    #print '... dumping pretraining functions to output file pre pickling'
    #print >> output_file, 'Pretraining functions, pre pickling'
    #for i in xrange(sda.n_layers):
        #theano.printing.debugprint(pretraining_fns[i], file = output_file, print_type=True) 
    print '... getting the max-norm regularization functions'
    max_norm_regularization_fns = sda_model.max_norm_regularization()

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [float(options.corruption), float(options.corruption)]
    for i in xrange(sda_model.n_layers):
        
        for epoch in xrange(num_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i]))
                # regularize weights here
                scale = max_norm_regularization_fns[i](norm_limit=options.norm_limit)               
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)
            print >> output_file, 'Learning rate '
            print >> output_file, learning_rate.get_value(borrow=True)
            print >> output_file, 'Scale ', scale
            decay_learning_rate()

    end_time = time.clock()

    print >> output_file, ('Pretraining time for file ' +
                          os.path.split(__file__)[1] +
                          ' was %.2fm to go through %i epochs' % (((end_time - start_time) / 60.), (num_epochs / 2)))

    # Pickle the SdA
    print >> output_file, 'Pickling the model...'
    f = file(options.savefile, 'wb')
    cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()    
    
    # Unpickle the SdA
    print >> output_file, 'Unpickling the model...'
    f = file(options.savefile, 'rb')
    pickled_sda = cPickle.load(f)
    f.close()    
    
    
    # Test that the W-matrices and biases for the dA layers in sda are all close to the W-matrices 
    # and biases freshly unpickled
    for i in xrange(pickled_sda.n_layers):
        pickled_dA_params = pickled_sda.dA_layers[i].get_params()
        fresh_dA_params = sda_model.dA_layers[i].get_params()
        if not numpy.allclose(pickled_dA_params[0].get_value(), fresh_dA_params[0].get_value()):
            print >> output_file, ("numpy says that Ws in layer %i are not close" % (i))
            print >> output_file, "Norm for pickled dA " + pickled_dA_params[0].name  + ": " 
            print >> output_file, norm(pickled_dA_params[0].get_value())
            print >> output_file, "Values for pickled dA " + pickled_dA_params[0].name  + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[0].get_value())
            print >> output_file, "Norm for fresh dA " + fresh_dA_params[0].name + ": " 
            print >> output_file, norm(fresh_dA_params[0].get_value())
            print >> output_file, "Values for fresh dA " + fresh_dA_params[0].name  + ": " 
            print >> output_file, numpy.array_repr(fresh_dA_params[0].get_value())            
        if not numpy.allclose(pickled_dA_params[1].get_value(), fresh_dA_params[1].get_value()):
            print >> output_file, ("numpy says that the biases in layer %i are not close" % (i))
            print >> output_file, "Norm for pickled dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, norm(pickled_dA_params[1].get_value())
            print >> output_file, "Values for pickled dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value())            
            print >> output_file, "Norm for fresh dA " + fresh_dA_params[1].name + ": " 
            print >> output_file, norm(fresh_dA_params[1].get_value())
            print >> output_file, "Values for fresh dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value())            
    
    output_file.close()