Esempi in Python per SdA.max_norm_regularization

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: SdA

Classe/tipologia: SdA

Metodo/funzione: max_norm_regularization

Esempi su hotexamples.com: 2

SdA.max_norm_regularization in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per SdA.SdA.max_norm_regularization, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

SdA(8)

pretraining_functions(5)

build_finetune_functions(3)

max_norm_regularization(2)

save_params(2)

build_finetune_full_reconstruction(1)

build_finetune_limited_reconstruction(1)

build_predict_function(1)

change_lastlayer(1)

load(1)

predicter(1)

save(1)

Esempio n. 1

Mostra file

File: hybrid_pretrain_SdA_multiproc.py Progetto: lzamparo/SdA_reduce

def pretrain(shared_args, private_args): 
    """ Pretrain an SdA model for the given number of training epochs.  The model is either initialized from 
    scratch, or is reconstructed from a previously pickled model.

    :type shared_args: dict
    :param shared_args: dict containing all the arguments common to both models.

    :type private_args: dict
    :param private_args: dict containing all the arguments specific to each model spawned off this first process.
    
    """
    
    # Import sandbox.cuda to bind the specified GPU to this subprocess
    # then import the remaining theano and model modules.
    import theano.sandbox.cuda
    theano.sandbox.cuda.use(private_args['gpu'])
    
    import theano
    import theano.tensor as T
    from theano.tensor.shared_randomstreams import RandomStreams
    from SdA import SdA    
    
    shared_args_dict = shared_args[0]
    
    current_dir = os.getcwd()    
    
    os.chdir(shared_args_dict['dir'])
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    arch_list = get_arch_list(private_args)            
    corruption_list = [shared_args_dict['corruption'] for i in arch_list]
    layer_types = parse_layer_type(shared_args_dict['layertype'], len(arch_list))    
    
    output_filename = "hybrid_pretraining_sda_" + "_".join(elem for elem in layer_types) + private_args['arch'] + "." + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(shared_args_dict['input']), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 30, offset = shared_args_dict['offset'])
    if datafiles is None:
            print("No data was returned, exiting.")
            data_set_file.close()
            output_file.close()
            return    
    
    train_set_x = load_data_unlabeled(datafiles)

    # DEBUG: get validation set too
    validation_datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 5, offset = shared_args_dict['offset'] + 30)
    valid_set_x = load_data_unlabeled(validation_datafiles)      
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= shared_args_dict['batch_size']
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    
    # Set the initial value of the learning rate
    learning_rate = theano.shared(numpy.asarray(shared_args_dict['pretrain_lr'], 
                                             dtype=theano.config.floatX))     
    
    
    # Check if we can restore from a previously trained model,    
    # otherwise construct a new SdA
    if private_args.has_key('restore'):
        print >> output_file, 'Unpickling the model from %s ...' % (private_args['restore'])
        current_dir = os.getcwd()    
        os.chdir(shared_args_dict['dir'])         
        f = file(private_args['restore'], 'rb')
        sda_model = cPickle.load(f)
        f.close()        
        os.chdir(current_dir)
    else:
        print '... building the model'  
        
        sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=arch_list,
              corruption_levels = corruption_list,
              layer_types=layer_types,
              loss=shared_args_dict['loss'],
              n_outs=-1,
              sparse_init=shared_args_dict['sparse_init'],
              opt_method=shared_args_dict['opt_method'])

    #########################
    # PRETRAINING THE MODEL #
    #########################    
    
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=shared_args_dict['batch_size'],
                                                learning_rate=learning_rate,
                                                method='cm')

    print '... getting the hybrid training functions'
    hybrid_pretraining_fns = sda_model.build_finetune_limited_reconstruction(train_set_x=train_set_x, 
                                                                      batch_size=shared_args_dict['batch_size'], 
                                                                      learning_rate=learning_rate,
                                                                      method='cm')
    
    # DEBUG: get full finetuning theano function
    # get the training, validation function for the model
    datasets = (train_set_x,valid_set_x)
        
    print '... getting the finetuning functions'
    finetune_train_fn, validate_model = sda_model.build_finetune_full_reconstruction(
                datasets=datasets, batch_size=shared_args_dict['batch_size'],
                learning_rate=learning_rate,
                method='cm')    

    
    # DEBUG: should only have n_layers - 2 hybrid pretraining functions
    assert len(hybrid_pretraining_fns) == sda_model.n_layers - 2
    
    print '... writing meta-data to output file'
    metadict = {'n_train_batches': n_train_batches}
    metadict = dict(metadict.items() + shared_args_dict.items())
    write_metadata(output_file, metadict)    
    
    print '... pre-training the model'
    start_time = time.clock()
    
    # Get corruption levels from the SdA.  
    corruption_levels = sda_model.corruption_levels
    
    # Function to decrease the learning rate
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
                updates={learning_rate: learning_rate * shared_args_dict['lr_decay']})  
    
    # Function to reset the learning rate
    lr_val = T.scalar('original_lr')
    reset_learning_rate = theano.function(inputs=[lr_val], outputs=learning_rate,
                updates={learning_rate: lr_val})
    
    # Set up functions for max norm regularization
    apply_max_norm_regularization = sda_model.max_norm_regularization()  
    
    for i in xrange(sda_model.n_layers):       
                
        for epoch in xrange(shared_args_dict['pretraining_epochs']):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],momentum=shared_args_dict['momentum']))
                                
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)
            print >> output_file, learning_rate.get_value(borrow=True)
            decay_learning_rate()
            apply_max_norm_regularization(norm_limit=shared_args_dict['maxnorm'])
        
        # Do hybrid pretraining only on the middle layer(s)
        if i > 0 and i < sda_model.n_layers - 1:
            for h_epoch in xrange(20):
                hybrid_c = []
                for batch_index in xrange(n_train_batches):
                    hybrid_c.append(hybrid_pretraining_fns[i-1](index=batch_index,momentum=shared_args_dict['momentum']))  
                print >> output_file, "Hybrid pre-training on layers %i and below, epoch %d, cost" % (i, h_epoch),
                print >> output_file, numpy.mean(hybrid_c)
        
        # Reset the learning rate
        reset_learning_rate(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=numpy.float32))
        
        if private_args.has_key('save'):
            print >> output_file, 'Pickling the model...'
            current_dir = os.getcwd()    
            os.chdir(shared_args_dict['dir'])            
            f = file(private_args['save'], 'wb')
            cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
            os.chdir(current_dir)

    print '... finetuning with final layer'
    best_validation_loss = numpy.inf
    for f_epoch in xrange(20):
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = finetune_train_fn(minibatch_index, shared_args_dict['momentum'])
                    
            # DEBUG: monitor the training error
            print >> output_file, ('Fine-tuning epoch %i, minibatch %i/%i, training error %f ' %
                    (f_epoch, minibatch_index + 1, n_train_batches,
                    minibatch_avg_cost))            
    
            # apply max-norm regularization
            apply_max_norm_regularization(shared_args_dict['maxnorm'])          
    
        # validate every epoch               
        validation_losses = validate_model()
        this_validation_loss = numpy.mean(validation_losses)
        
        # save best model that achieved this best loss  
        if this_validation_loss < best_validation_loss:  
            print >> output_file, 'Pickling the model...'  
            current_dir = os.getcwd()    
            os.chdir(shared_args_dict['dir'])            
            f = file(private_args['save'], 'wb')
            cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
            os.chdir(current_dir)
            
        print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f ' %
              (f_epoch, minibatch_index + 1, n_train_batches,
               this_validation_loss))        

    end_time = time.clock()

    print >> output_file, ('The hybrid training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    output_file.close()

Esempio n. 2

Mostra file

File: test_max_norm.py Progetto: lzamparo/SdA_reduce

def test_restrict_norm_SdA(num_epochs=10, pretrain_lr=0.00001, lr_decay = 0.98, batch_size=20):
    """
    
    Pretrain an SdA model for the given number of training epochs, applying norm restrictions on the W matrices.  Try ReLU units, since their weights seem to blow up 
    on this data set.

    :type num_epochs: int
    :param num_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type batch_size: int
    :param batch_size: train in mini-batches of this size

    """
    
    layer_types=['ReLU','ReLU']
    current_dir = os.getcwd()       
    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = "test_max_norm_sda_." + '_'.join([elem for elem in layer_types]) + day + "." + hour
    output_file = open(output_filename,'w')
    os.chdir(current_dir)    
    print >> output_file, "Run on " + str(datetime.now())    
    
    # Get the training data sample from the input file
    data_set_file = openFile(str(options.inputfile), mode = 'r')
    datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 10)
    train_set_x = load_data_unlabeled(datafiles, features = (5,20))
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches, n_features = train_set_x.get_value(borrow=True).shape
    n_train_batches /= batch_size
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    
    # Set the initial value of the learning rate
    learning_rate = theano.shared(numpy.asarray(pretrain_lr, 
                                             dtype=theano.config.floatX))
    
    # Function to decrease the learning rate
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
                    updates={learning_rate: learning_rate * lr_decay})    

    sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features,
              hidden_layers_sizes=[5, 5],
              corruption_levels = [0.25, 0.25],
              layer_types=layer_types)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                learning_rate=learning_rate)

    #print '... dumping pretraining functions to output file pre pickling'
    #print >> output_file, 'Pretraining functions, pre pickling'
    #for i in xrange(sda.n_layers):
        #theano.printing.debugprint(pretraining_fns[i], file = output_file, print_type=True) 
    print '... getting the max-norm regularization functions'
    max_norm_regularization_fns = sda_model.max_norm_regularization()

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [float(options.corruption), float(options.corruption)]
    for i in xrange(sda_model.n_layers):
        
        for epoch in xrange(num_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i]))
                # regularize weights here
                scale = max_norm_regularization_fns[i](norm_limit=options.norm_limit)               
            print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print >> output_file, numpy.mean(c)
            print >> output_file, 'Learning rate '
            print >> output_file, learning_rate.get_value(borrow=True)
            print >> output_file, 'Scale ', scale
            decay_learning_rate()

    end_time = time.clock()

    print >> output_file, ('Pretraining time for file ' +
                          os.path.split(__file__)[1] +
                          ' was %.2fm to go through %i epochs' % (((end_time - start_time) / 60.), (num_epochs / 2)))

    # Pickle the SdA
    print >> output_file, 'Pickling the model...'
    f = file(options.savefile, 'wb')
    cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()    
    
    # Unpickle the SdA
    print >> output_file, 'Unpickling the model...'
    f = file(options.savefile, 'rb')
    pickled_sda = cPickle.load(f)
    f.close()    
    
    
    # Test that the W-matrices and biases for the dA layers in sda are all close to the W-matrices 
    # and biases freshly unpickled
    for i in xrange(pickled_sda.n_layers):
        pickled_dA_params = pickled_sda.dA_layers[i].get_params()
        fresh_dA_params = sda_model.dA_layers[i].get_params()
        if not numpy.allclose(pickled_dA_params[0].get_value(), fresh_dA_params[0].get_value()):
            print >> output_file, ("numpy says that Ws in layer %i are not close" % (i))
            print >> output_file, "Norm for pickled dA " + pickled_dA_params[0].name  + ": " 
            print >> output_file, norm(pickled_dA_params[0].get_value())
            print >> output_file, "Values for pickled dA " + pickled_dA_params[0].name  + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[0].get_value())
            print >> output_file, "Norm for fresh dA " + fresh_dA_params[0].name + ": " 
            print >> output_file, norm(fresh_dA_params[0].get_value())
            print >> output_file, "Values for fresh dA " + fresh_dA_params[0].name  + ": " 
            print >> output_file, numpy.array_repr(fresh_dA_params[0].get_value())            
        if not numpy.allclose(pickled_dA_params[1].get_value(), fresh_dA_params[1].get_value()):
            print >> output_file, ("numpy says that the biases in layer %i are not close" % (i))
            print >> output_file, "Norm for pickled dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, norm(pickled_dA_params[1].get_value())
            print >> output_file, "Values for pickled dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value())            
            print >> output_file, "Norm for fresh dA " + fresh_dA_params[1].name + ": " 
            print >> output_file, norm(fresh_dA_params[1].get_value())
            print >> output_file, "Values for fresh dA " + pickled_dA_params[1].name + ": " 
            print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value())            
    
    output_file.close()