Beispiel #1
0
    def redo_theano(self):
        """ Compiles the theano function for the default learning rule """

        init_names = dir(self)

        minibatch = tensor.matrix()

        optimizer = SGDOptimizer(self, self.base_lr, self.anneal_start)

        sampler = sampler = BlockGibbsSampler(
            self,
            0.5 + np.zeros((self.nchains, self.get_input_dim())),
            self.rng,
            steps=self.sml_gibbs_steps)

        updates = training_updates(visible_batch=minibatch,
                                   model=self,
                                   sampler=sampler,
                                   optimizer=optimizer)

        self.learn_func = theano.function([minibatch], updates=updates)

        final_names = dir(self)

        self.register_names_to_del(
            [name for name in final_names if name not in init_names])
Beispiel #2
0
        #'lr_vb': 0.10,
        'irange': 0.001,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()
    minibatch = theano.printing.Print('min')(minibatch)

    # Allocate a denoising autoencoder with binomial noise corruption.
    cae = ContractiveAutoencoder(conf['nvis'], conf['nhid'],
                                 conf['act_enc'], conf['act_dec'])

    # Allocate an optimizer, which tells us how to update our model.
    cost = SquaredError(cae)(minibatch, cae.reconstruct(minibatch)).mean()
    cost += cae.contraction_penalty(minibatch).mean()
    trainer = SGDOptimizer(cae, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    batchsize = 10

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(5):
        for offset in xrange(0, data.shape[0], batchsize):
            minibatch_err = train_fn(data[offset:(offset + batchsize)])
            print ("epoch %d, batch %d-%d: %f" %
                   (epoch, offset, offset + batchsize - 1, minibatch_err))
Beispiel #3
0
    }
    print '== training =='
    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(corruption_level=conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'],
                              tied_weights=conf['tied_weights'],
                              irange=conf['irange'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da.params(), conf['base_lr'], conf['anneal_start'])

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost,
                               updates=trainer.cost_updates(cost))

    # Suppose we want minibatches of size 10
    batchsize = 10

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(10):
        for offset in xrange(0, train_data.shape[0], batchsize):
            minibatch_err = train_fn(train_data[offset:(offset + batchsize)])
            #print "epoch %d, batch %d-%d: %f" % \
                    #(epoch, offset, offset + batchsize - 1, minibatch_err)
Beispiel #4
0
        #'lr_vb': 0.10,
        'irange': 0.001,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()
    minibatch = theano.printing.Print('min')(minibatch)

    # Allocate a denoising autoencoder with binomial noise corruption.
    cae = ContractiveAutoencoder(conf['nvis'], conf['nhid'], conf['act_enc'],
                                 conf['act_dec'])

    # Allocate an optimizer, which tells us how to update our model.
    cost = SquaredError(cae)(minibatch, cae.reconstruct(minibatch)).mean()
    cost += cae.contraction_penalty(minibatch).mean()
    trainer = SGDOptimizer(cae, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    batchsize = 10

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(5):
        for offset in xrange(0, data.shape[0], batchsize):
            minibatch_err = train_fn(data[offset:(offset + batchsize)])
            print("epoch %d, batch %d-%d: %f" %
                  (epoch, offset, offset + batchsize - 1, minibatch_err))
Beispiel #5
0
def main_train(epochs, batchsize, solution='',sparse_penalty=0,sparsityTarget=0,sparsityTargetPenalty=0):
    
    # Experiment specific arguments
    conf_dataset = {'dataset' : 'avicenna',
                    'expname' : 'dummy', # Used to create the submission file
                    'transfer' : True,
                    'normalize' : True, # (Default = True)
                    'normalize_on_the_fly' : False, # (Default = False)
                    'randomize_valid' : True, # (Default = True)
                    'randomize_test' : True, # (Default = True)
                    'saving_rate': 0, # (Default = 0)
                    'savedir' : './outputs',
                   }
                   
    # First layer = PCA-75 whiten
    pca_layer = {'name' : '1st-PCA',
                 'num_components': 75,
                 'min_variance': -50,
                 'whiten': True,
                 'pca_class' : 'CovEigPCA',
                 # Training properties
                 'proba' : [1, 0, 0],
                 'savedir' : './outputs',
                }
                                                                                                               
                                                                                                                    
    # Load the dataset
    data = utils.load_data(conf_dataset)
        
    if conf_dataset['transfer']:
    # Data for the ALC proxy
        label = data[3]
        data = data[:3]
        
  
                                    
    # First layer : train or load a PCA
    pca = create_pca(conf_dataset, pca_layer, data, model=pca_layer['name'])
    data = [utils.sharedX(pca.function()(set.get_value(borrow=True)),borrow=True) for set in data]  
    '''
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
      
        alc = embed.score(data_train, label_train)
        print '... resulting ALC on train (for PCA) is', alc
    '''                     
                         
                                                                                   
    nvis = utils.get_constant(data[0].shape[1]).item()
  
    conf = {
        'corruption_level': 0.1,
        'nhid': 200,
        'nvis': nvis,
        'anneal_start': 100,
        'base_lr': 0.001, 
        'tied_weights': True,
        'act_enc': 'sigmoid',
        'act_dec': None,
        #'lr_hb': 0.10,
        #'lr_vb': 0.10,
        'tied_weights': True ,
        'solution': solution,
        'sparse_penalty': sparse_penalty,
        'sparsityTarget': sparsityTarget ,
        'sparsityTargetPenalty': sparsityTargetPenalty,
        'irange': 0,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'], conf['tied_weights'], conf['solution'], conf['sparse_penalty'],
                              conf['sparsityTarget'], conf['sparsityTargetPenalty'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    proba = utils.getboth(conf, pca_layer, 'proba')    
    iterator = BatchIterator(data, proba, batchsize)
    
    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    final_cost = 0
    for epoch in xrange(epochs):
        c = []
        for minibatch_data in iterator:
            minibatch_err = train_fn(minibatch_data)
            c.append(minibatch_err)
        final_cost = numpy.mean(c)
        print "epoch %d, cost : %f" % (epoch , final_cost)
        

    print '############################## Fin de l\'experience ############################'
    print 'Calcul de l\'ALC : '
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
        alc = embed.score(data_train, label_train)
        
        print 'Solution : ',solution
        print 'sparse_penalty = ',sparse_penalty
        print 'sparsityTarget = ',sparsityTarget
        print 'sparsityTargetPenalty = ',sparsityTargetPenalty
        print 'Final denoising error is : ',final_cost 
        print '... resulting ALC on train is', alc    
        return (alc,final_cost)
Beispiel #6
0
        'nhid': 30,
        'rbm_seed': 1,
        'batch_size': 100,
        'base_lr': 1e-4,
        'anneal_start': 1,
        'pcd_steps': 1,
    }

    rbm = GaussianBinaryRBM(nvis=conf['nvis'], nhid=conf['nhid'],
                         irange=0.5, energy_function_class = GRBM_Type_1)
    rng = numpy.random.RandomState(seed=conf.get('rbm_seed', 42))
    sampler = BlockGibbsSampler(rbm, data[0:100], rng,
                                  steps=conf['pcd_steps'])
    minibatch = tensor.matrix()

    optimizer = SGDOptimizer(rbm, conf['base_lr'], conf['anneal_start'])
    updates = training_updates(visible_batch=minibatch, model=rbm,
                               sampler=sampler, optimizer=optimizer)

    proxy_cost = rbm.reconstruction_error(minibatch, rng=sampler.s_rng)
    train_fn = theano.function([minibatch], proxy_cost, updates=updates)

    vis = tensor.matrix('vis')
    free_energy_fn = theano.function([vis], rbm.free_energy_given_v(vis))

    #utils.debug.setdebug()

    recon = []
    nlls = []
    for j in range(0, 401):
        avg_rec_error = 0
Beispiel #7
0
def main_train(epochs,
               batchsize,
               solution='',
               sparse_penalty=0,
               sparsityTarget=0,
               sparsityTargetPenalty=0):

    # Experiment specific arguments
    conf_dataset = {
        'dataset': 'avicenna',
        'expname': 'dummy',  # Used to create the submission file
        'transfer': True,
        'normalize': True,  # (Default = True)
        'normalize_on_the_fly': False,  # (Default = False)
        'randomize_valid': True,  # (Default = True)
        'randomize_test': True,  # (Default = True)
        'saving_rate': 0,  # (Default = 0)
        'savedir': './outputs',
    }

    # First layer = PCA-75 whiten
    pca_layer = {
        'name': '1st-PCA',
        'num_components': 75,
        'min_variance': -50,
        'whiten': True,
        'pca_class': 'CovEigPCA',
        # Training properties
        'proba': [1, 0, 0],
        'savedir': './outputs',
    }

    # Load the dataset
    data = utils.load_data(conf_dataset)

    if conf_dataset['transfer']:
        # Data for the ALC proxy
        label = data[3]
        data = data[:3]

    # First layer : train or load a PCA
    pca = create_pca(conf_dataset, pca_layer, data, model=pca_layer['name'])
    data = [
        utils.sharedX(pca.function()(set.get_value(borrow=True)), borrow=True)
        for set in data
    ]
    '''
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
      
        alc = embed.score(data_train, label_train)
        print '... resulting ALC on train (for PCA) is', alc
    '''

    nvis = utils.get_constant(data[0].shape[1]).item()

    conf = {
        'corruption_level': 0.1,
        'nhid': 200,
        'nvis': nvis,
        'anneal_start': 100,
        'base_lr': 0.001,
        'tied_weights': True,
        'act_enc': 'sigmoid',
        'act_dec': None,
        #'lr_hb': 0.10,
        #'lr_vb': 0.10,
        'tied_weights': True,
        'solution': solution,
        'sparse_penalty': sparse_penalty,
        'sparsityTarget': sparsityTarget,
        'sparsityTargetPenalty': sparsityTargetPenalty,
        'irange': 0,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'],
                              conf['tied_weights'], conf['solution'],
                              conf['sparse_penalty'], conf['sparsityTarget'],
                              conf['sparsityTargetPenalty'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    proba = utils.getboth(conf, pca_layer, 'proba')
    iterator = BatchIterator(data, proba, batchsize)

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    final_cost = 0
    for epoch in xrange(epochs):
        c = []
        for minibatch_data in iterator:
            minibatch_err = train_fn(minibatch_data)
            c.append(minibatch_err)
        final_cost = numpy.mean(c)
        print "epoch %d, cost : %f" % (epoch, final_cost)

    print '############################## Fin de l\'experience ############################'
    print 'Calcul de l\'ALC : '
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
        alc = embed.score(data_train, label_train)

        print 'Solution : ', solution
        print 'sparse_penalty = ', sparse_penalty
        print 'sparsityTarget = ', sparsityTarget
        print 'sparsityTargetPenalty = ', sparsityTargetPenalty
        print 'Final denoising error is : ', final_cost
        print '... resulting ALC on train is', alc
        return (alc, final_cost)
Beispiel #8
0
    MyCorruptor = pylearn2.corruption.get(name)
    corruptor = MyCorruptor(layer.get('corruption_level', 0))

    # Allocate an denoising or contracting autoencoder
    MyAutoencoder = pylearn2.autoencoder.get(clsname)
    ae = MyAutoencoder.fromdict(layer, corruptor=corruptor)

    # Allocate an optimizer, which tells us how to update our model.
    MyCost = pylearn2.cost.get(layer['cost_class'])
    varcost = MyCost(ae)(minibatch, ae.reconstruct(minibatch))
    if isinstance(ae, ContractiveAutoencoder):
        alpha = layer.get('contracting_penalty', 0.1)
        penalty = alpha * ae.contraction_penalty(minibatch)
        varcost = varcost + penalty
    varcost = varcost.mean()
    trainer = SGDOptimizer(ae, layer['base_lr'], layer['anneal_start'])
    updates = trainer.cost_updates(varcost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch],
                               varcost,
                               updates=updates,
                               name='train_fn')

    # Here's a manual training loop.
    print '... training layer:', clsname
    start_time = time.clock()
    proba = utils.getboth(layer, conf, 'proba')
    iterator = BatchIterator(data, proba, layer['batch_size'])
    saving_counter = 0
    saving_rate = utils.getboth(layer, conf, 'saving_rate', 0)
Beispiel #9
0
            W_irange=conf['W_irange'],
            rng=rng)

    sampler = PersistentCDSampler(
            rbm,
            data[0:conf['batch_size']],
            rng,
            steps=conf['pcd_steps'],
            particles_clip=(conf['particles_min'], conf['particles_max']),
            )
    minibatch = tensor.matrix()

    optimizer = SGDOptimizer(
            rbm,
            conf['base_lr'],
            conf['anneal_start'],
            log_alpha_clip=(numpy.log(conf['alpha_min']), numpy.log(conf['alpha_max'])),
            B_clip=(conf['B_min'], conf['B_max']),
            Lambda_clip=(conf['Lambda_min'], conf['Lambda_max']),
            )
    updates = training_updates(visible_batch=minibatch, model=rbm,
                               sampler=sampler, optimizer=optimizer)

    proxy_cost = rbm.reconstruction_error(minibatch, rng=sampler.s_rng)
    train_fn = theano.function([minibatch], proxy_cost, updates=updates)

    vis = tensor.matrix('vis')
    free_energy_fn = theano.function([vis], rbm.free_energy_given_v(vis))

    utils.debug.setdebug()

    recon = []
Beispiel #10
0
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(corruption_level=conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor,
                              conf['nvis'],
                              conf['nhid'],
                              conf['act_enc'],
                              conf['act_dec'],
                              tied_weights=conf['tied_weights'],
                              irange=conf['irange'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da.params(), conf['base_lr'], conf['anneal_start'])

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch],
                               cost,
                               updates=trainer.cost_updates(cost))

    # Suppose we want minibatches of size 10
    batchsize = 10

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(10):
        for offset in xrange(0, train_data.shape[0], batchsize):
            minibatch_err = train_fn(train_data[offset:(offset + batchsize)])
            #print "epoch %d, batch %d-%d: %f" % \