Esempio n. 1
0
def run_SAE_experiment(pretrain_lr=0.1,
                       pretraining_epochs=3300,
                       finetune_lr=0.1,
                       training_epochs=4e5,
                       L1_reg=0.0,
                       L2_reg=1e-4,
                       dataset='KSC.pkl',
                       split_proportions=[6, 2, 2],
                       hidden_layers_sizes=[20],
                       corruption_levels=[0.],
                       batch_size=20,
                       log_file='log',
                       restart=False,
                       use_rate_schedule=True,
                       load_pretrained_weights=False):
    """
    Reproduce the paper...
    """
    assert not (restart and load_pretrained_weights)
    assert not (load_pretrained_weights and len(hidden_layers_sizes) != 5)
    assert len(hidden_layers_sizes)==len(corruption_levels), \
           "Error: hidden_layers_sizes and corruption_levels need to be of equal length"

    pretrain_rate_decay = (type(pretrain_lr) == tuple)
    train_rate_decay = (type(finetune_lr) == tuple)
    assert pretrain_rate_decay or type(pretrain_lr) == float
    assert train_rate_decay or type(finetune_lr) == float
    assert not (use_rate_schedule and train_rate_decay), (
        'Error:',
        'Can not use adaptive rate schedule and linear rate schedule together')

    #cast number of epochsto int
    pretraining_epochs = int(pretraining_epochs)
    training_epochs = int(training_epochs)

    #check for linear rate schedules
    if pretrain_rate_decay:
        linear_pretrain_rates = True
        pretrain_rates = numpy.linspace(pretrain_lr[0], pretrain_lr[1],
                                        pretraining_epochs)
    else:
        pretrain_rates = [pretrain_lr] * pretraining_epochs

    if train_rate_decay:
        linear_train_rates = True
        train_rates = numpy.linspace(finetune_lr[0], finetune_lr[1],
                                     training_epochs)
    else:
        train_rates = [finetune_lr] * training_epochs

    #create a log object
    logger = Logger(log_file)

    #log run params
    if restart: logger.log("Restarting run using old best_model")
    logger.log("Running SAE Experiment...")
    logger.add_newline()
    logger.log("Runtime params:")
    logger.log("pretrain_lr=%s" % str(pretrain_lr))
    logger.log("pretraining_epochs=%d" % pretraining_epochs)
    logger.log("finetune_lr=%s" % str(finetune_lr))
    logger.log("training_epochs=%d" % training_epochs)
    logger.log("L1_reg=%f" % L1_reg)
    logger.log("L2_reg=%f" % L2_reg)
    logger.log("dataset=%s" % dataset)
    logger.log("split_proportions=%s" % str(split_proportions))
    logger.log("hidden_layers_sizes=%s" % str(hidden_layers_sizes))
    logger.log("corruption_levels=%s" % str(corruption_levels))
    logger.log("batch_size=%d" % batch_size)
    logger.log("use_rate_schedule=%s" % use_rate_schedule)
    logger.log("load_pretrained_weights=%s" % load_pretrained_weights)
    logger.add_newline()

    datasets = load_data(dataset, split_proportions, logger)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    logger.log('... building the model')
    # construct the stacked denoising autoencoder class
    #since labels were cast to int32 need to do this to get the shared variable
    shared_train_set_y = train_set_y.owner.inputs[0]
    if not restart:
        sda = SdA(numpy_rng=numpy_rng,
                  n_ins=train_set_x.get_value(borrow=True).shape[1],
                  hidden_layers_sizes=hidden_layers_sizes,
                  n_outs=numpy.unique(
                      shared_train_set_y.get_value(borrow=True)).size,
                  L1_reg=L1_reg,
                  L2_reg=L2_reg)
    elif restart:
        logger.log("loading model from best_model.pkl")
        sda = cPickle.load(open('best_model.pkl', 'r'))
    elif load_pretrained_weights:
        logger.log("loading model from pretrained_model.pkl")
        sda = cPickle.load(open('pretrained_model.pkl', 'r'))

    #create dictionary to store training stat accumulation arrays for easy pickling
    train_stat_dict = {}

    #########################
    # PRETRAINING THE MODEL #
    #########################
    pretrainig_costs = [[] for i in xrange(sda.n_layers)
                        ]  # average pretrainig cost at each epoch
    train_stat_dict['pretrainig_costs'] = pretrainig_costs
    if not (restart or load_pretrained_weights or SKIP_PRETRAINING):
        logger.log('... getting the pretraining functions')
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size)

        logger.log('... pre-training the model')
        start_time = timeit.default_timer()

        ## Pre-train layer-wise
        for i in xrange(sda.n_layers):
            # go through pretraining epochs
            for epoch in xrange(pretraining_epochs):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](
                        index=batch_index,
                        corruption=corruption_levels[i],
                        lr=pretrain_rates[epoch]))
                logger.log('Pre-training layer %i, epoch %d, cost ' %
                           (i, epoch))
                logger.log(str(numpy.mean(c)))
                pretrainig_costs[i].append(numpy.mean(c))

        end_time = timeit.default_timer()

        #save the pretrained model
        with open('pretrained_model.pkl', 'w') as f:
            cPickle.dump(sda, f)

        logger.log('The pretraining code for file ' +
                   os.path.split(__file__)[1] + ' ran for %.2fm' %
                   ((end_time - start_time) / 60.))
    else:
        logger.log("skipping pretraining")

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    logger.log('... getting the finetuning functions')
    (train_fn, validate_model_NLL, validate_model_zero_one,
     test_model) = sda.build_finetune_functions(datasets=datasets,
                                                batch_size=batch_size)

    logger.log('... finetunning the model')
    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    iter = 0  # global minibatch iteration
    minibatch_avg_NLL = []  #array to accumulate NLL cost over over minibatches
    training_NLL = [
    ]  # average training NLL cost at each epoch (really after val_freq iters)
    validation_NLL = []  # average validation NLL cost at each epoch
    validation_zero_one = [
    ]  # average zero one cost at each epoch (% misclassified)
    train_stat_dict['training_NLL'] = training_NLL
    train_stat_dict['validation_NLL'] = validation_NLL
    train_stat_dict['validation_zero_one'] = validation_zero_one
    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            iter += 1
            minibatch_avg_NLL.append(
                train_fn(minibatch_index, lr=train_rates[epoch - 1]))

            if iter % validation_frequency == 0:
                """validation zero one loss """
                validation_zero_one_losses = validate_model_zero_one()
                validation_zero_one.append(
                    numpy.mean(validation_zero_one_losses))

                #validation NLL cost
                validation_NLL_losses = validate_model_NLL()
                validation_NLL.append(numpy.mean(validation_NLL_losses))

                #training NLL cost
                training_NLL.append(numpy.mean(minibatch_avg_NLL))
                minibatch_avg_NLL = []  #reset the NLL accumulator

                logger.log('epoch %i, minibatch %i/%i:' %
                           (epoch, minibatch_index + 1, n_train_batches))
                logger.log('\ttraining NLL loss: %f ' % training_NLL[-1])
                logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1])
                logger.log('\tvalidation zero one loss: %f %%' %
                           (validation_zero_one[-1] * 100.))

                # if we got the best validation score until now
                if validation_zero_one[-1] < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (validation_zero_one[-1] <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)
                    else:
                        print "improvemnt not good enough: %f" % (
                            validation_zero_one[-1] / best_validation_loss)

                    # save best validation score and iteration number
                    best_validation_loss = validation_zero_one[-1]
                    best_iter = iter

                    # test it on the test set
                    test_zero_one_losses = test_model()
                    test_score = numpy.mean(test_zero_one_losses)
                    print '\t\ttest zero one loss of best model %f %%' % (
                        test_score * 100.)

                    #save the best model
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(sda, f)

            if patience <= iter:
                pass
        #done_looping = True
        #break
        if use_rate_schedule and epoch % 100 == 0:
            if validation_NLL[epoch - 100] - validation_NLL[epoch - 1] < 1e-4:
                finetune_lr = max(finetune_lr / 2., 1e-6)
                train_rates = [finetune_lr] * training_epochs
                logger.log("Reducing learning rate. new rate: %f" %
                           finetune_lr)

    #save train_stat_dict to a .mat file
    sio.savemat('train_stats.mat', train_stat_dict)
    #with open('train_stat_dict.pkl','w') as f:
    #  cPickle.dump(train_stat_dict,f)
    end_time = timeit.default_timer()
    logger.log(('Optimization complete with best validation score of %f %%, '
                'on iteration %i, '
                'with test performance %f %%') %
               (best_validation_loss * 100., best_iter, test_score * 100.))
    logger.log('The training code for file ' + os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.))

    logger.close()
def run_SAE_experiment(pretrain_lr=0.1,pretraining_epochs=3300,
		       finetune_lr=0.1,training_epochs=4e5,
		       L1_reg=0.0,L2_reg=1e-4,
		       dataset='KSC.pkl',
		       split_proportions = [6,2,2],
		       hidden_layers_sizes=[20],
		       corruption_levels=[0.],
		       batch_size=20,
		       log_file='log',
		       restart = False,
		       use_rate_schedule=True,
		       load_pretrained_weights=False):
    """
    Reproduce the paper...
    """
    assert not(restart and load_pretrained_weights)
    assert not(load_pretrained_weights and len(hidden_layers_sizes)!=5)
    assert len(hidden_layers_sizes)==len(corruption_levels), \
           "Error: hidden_layers_sizes and corruption_levels need to be of equal length"
	 
    pretrain_rate_decay = (type(pretrain_lr)==tuple)
    train_rate_decay = (type(finetune_lr)==tuple)
    assert pretrain_rate_decay or type(pretrain_lr)==float
    assert train_rate_decay or type(finetune_lr)==float
    assert not (use_rate_schedule and train_rate_decay), ('Error:',
      'Can not use adaptive rate schedule and linear rate schedule together' )
    
    #cast number of epochsto int
    pretraining_epochs = int(pretraining_epochs)
    training_epochs = int(training_epochs)
    
    #check for linear rate schedules
    if pretrain_rate_decay:
      linear_pretrain_rates = True
      pretrain_rates = numpy.linspace(pretrain_lr[0],pretrain_lr[1],pretraining_epochs)
    else:
      pretrain_rates = [pretrain_lr]*pretraining_epochs
    
    if train_rate_decay:
      linear_train_rates = True
      train_rates = numpy.linspace(finetune_lr[0],finetune_lr[1],training_epochs)
    else:
      train_rates = [finetune_lr]*training_epochs
	 
    #create a log object
    logger = Logger(log_file)
    
    #log run params
    if restart: logger.log("Restarting run using old best_model")
    logger.log("Running SAE Experiment...")
    logger.add_newline()
    logger.log("Runtime params:")
    logger.log("pretrain_lr=%s" % str(pretrain_lr))
    logger.log("pretraining_epochs=%d" % pretraining_epochs)
    logger.log("finetune_lr=%s" % str(finetune_lr))
    logger.log("training_epochs=%d" % training_epochs)
    logger.log("L1_reg=%f" % L1_reg)
    logger.log("L2_reg=%f" % L2_reg)
    logger.log("dataset=%s" % dataset)
    logger.log("split_proportions=%s" % str(split_proportions))
    logger.log("hidden_layers_sizes=%s" % str(hidden_layers_sizes))
    logger.log("corruption_levels=%s" % str(corruption_levels))
    logger.log("batch_size=%d" % batch_size)
    logger.log("use_rate_schedule=%s" % use_rate_schedule)
    logger.log("load_pretrained_weights=%s" % load_pretrained_weights)
    logger.add_newline()
    
    
    datasets = load_data(dataset,split_proportions,logger)
    
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]    
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    
    # numpy random generator    
    numpy_rng = numpy.random.RandomState(89677)
    logger.log( '... building the model')
    # construct the stacked denoising autoencoder class    
    #since labels were cast to int32 need to do this to get the shared variable 
    shared_train_set_y = train_set_y.owner.inputs[0]   
    if not restart:
      sda = SdA(
          numpy_rng=numpy_rng,
          n_ins=train_set_x.get_value(borrow=True).shape[1],
          hidden_layers_sizes=hidden_layers_sizes,
          n_outs=numpy.unique(shared_train_set_y.get_value(borrow=True)).size,
          L1_reg=L1_reg,
          L2_reg=L2_reg
      )
    elif restart:
      logger.log("loading model from best_model.pkl")
      sda = cPickle.load(open('best_model.pkl','r'))
    elif load_pretrained_weights:
      logger.log("loading model from pretrained_model.pkl")
      sda = cPickle.load(open('pretrained_model.pkl','r'))

    #create dictionary to store training stat accumulation arrays for easy pickling
    train_stat_dict = {}

    #########################
    # PRETRAINING THE MODEL #
    #########################
    pretrainig_costs = [ [] for i in xrange(sda.n_layers) ] # average pretrainig cost at each epoch
    train_stat_dict['pretrainig_costs'] = pretrainig_costs
    if not (restart or load_pretrained_weights or SKIP_PRETRAINING):
      logger.log( '... getting the pretraining functions')
      pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                  batch_size=batch_size)
      
      logger.log( '... pre-training the model')
      start_time = timeit.default_timer()
      
      ## Pre-train layer-wise    
      for i in xrange(sda.n_layers):
          # go through pretraining epochs
          for epoch in xrange(pretraining_epochs):
              # go through the training set
              c = []
              for batch_index in xrange(n_train_batches):
                  c.append(pretraining_fns[i](index=batch_index,
                           corruption=corruption_levels[i],
                           lr=pretrain_rates[epoch]))
              logger.log('Pre-training layer %i, epoch %d, cost ' % (i, epoch) )
              logger.log( str(numpy.mean(c)) )
              pretrainig_costs[i].append(numpy.mean(c))
      
      end_time = timeit.default_timer()
      
      #save the pretrained model
      with open('pretrained_model.pkl', 'w') as f:
          cPickle.dump(sda, f)
      
      logger.log( 'The pretraining code for file ' +
                            os.path.split(__file__)[1] +
                            ' ran for %.2fm' % ((end_time - start_time) / 60.)
      )
    else:
      logger.log("skipping pretraining")
    
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    logger.log( '... getting the finetuning functions')
    ( train_fn, validate_model_NLL, 
      validate_model_zero_one, test_model ) = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size
        )

    logger.log( '... finetunning the model')
    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0    
    iter = 0 # global minibatch iteration
    minibatch_avg_NLL = [] #array to accumulate NLL cost over over minibatches
    training_NLL = []     # average training NLL cost at each epoch (really after val_freq iters)
    validation_NLL = []  # average validation NLL cost at each epoch
    validation_zero_one = [] # average zero one cost at each epoch (% misclassified)
    train_stat_dict['training_NLL'] = training_NLL
    train_stat_dict['validation_NLL'] = validation_NLL
    train_stat_dict['validation_zero_one'] = validation_zero_one    
    while (epoch < training_epochs) and (not done_looping):        
        epoch = epoch + 1                
        for minibatch_index in xrange(n_train_batches):
	    iter += 1
            minibatch_avg_NLL.append( train_fn(minibatch_index,lr= train_rates[epoch-1] ) )

            if iter % validation_frequency == 0:	
              """validation zero one loss """	      
              validation_zero_one_losses = validate_model_zero_one()
              validation_zero_one.append( numpy.mean(validation_zero_one_losses) )              
                  
	      #validation NLL cost
	      validation_NLL_losses = validate_model_NLL()
	      validation_NLL.append( numpy.mean(validation_NLL_losses) )
	      
	      #training NLL cost
	      training_NLL.append( numpy.mean(minibatch_avg_NLL) )
	      minibatch_avg_NLL = [] #reset the NLL accumulator
	      
	      logger.log( 'epoch %i, minibatch %i/%i:' % (epoch, minibatch_index + 1, n_train_batches))
	      logger.log('\ttraining NLL loss: %f ' % training_NLL[-1])
	      logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1])
	      logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.))

              # if we got the best validation score until now
              if validation_zero_one[-1] < best_validation_loss: 		  
                                    
                  #improve patience if loss improvement is good enough
                  if (
                      validation_zero_one[-1] < best_validation_loss *
                      improvement_threshold
                  ):
                      patience = max(patience, iter * patience_increase)
                  else:
		      print "improvemnt not good enough: %f" % (validation_zero_one[-1]/best_validation_loss)
                      
                  # save best validation score and iteration number
                  best_validation_loss = validation_zero_one[-1]
                  best_iter = iter

                  # test it on the test set
                  test_zero_one_losses = test_model()
                  test_score = numpy.mean(test_zero_one_losses)
                  print '\t\ttest zero one loss of best model %f %%' % (test_score * 100.)
		
	          #save the best model
	          with open('best_model.pkl', 'w') as f:
                      cPickle.dump(sda, f)
	      
            if patience <= iter:
	        pass
                #done_looping = True
                #break
        if use_rate_schedule and epoch%100==0:
	      if validation_NLL[epoch-100]-validation_NLL[epoch-1]<1e-4:	
		finetune_lr = max(finetune_lr/2.,1e-6)
		train_rates = [finetune_lr]*training_epochs
		logger.log("Reducing learning rate. new rate: %f" % finetune_lr)	          
        
    #save train_stat_dict to a .mat file
    sio.savemat('train_stats.mat',train_stat_dict)
    #with open('train_stat_dict.pkl','w') as f:
    #  cPickle.dump(train_stat_dict,f)
    end_time = timeit.default_timer()
    logger.log(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter, test_score * 100.)
    )
    logger.log ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.)
    )

    logger.close()
Esempio n. 3
0
def run_deepNet(dataset='KSC.pkl',
                split_proportions=[6, 2, 2],
                hidden_sizes=[20],
                hidden_nonlinearity=lasagne.nonlinearities.rectify,
                dropout_probs=[0.5],
                learning_rate=0.1,
                momentum=0.9,
                num_epochs=int(5e4),
                minibatch_size=64,
                log_file='log'):

    #create a log object
    logger = Logger(log_file)

    #log run params
    logger.log("Running run_deepNet Experiment...")
    logger.add_newline()
    logger.log("Runtime params:")
    logger.log("dataset=%s" % dataset)
    logger.log("split_proportions=%s" % str(split_proportions))
    logger.log("hidden_sizes=%s" % str(hidden_sizes))
    logger.log("hidden_nonlinearity=%s" % str(hidden_nonlinearity))
    logger.log("dropout_probs=%s" % str(dropout_probs))
    logger.log("learning_rate=%s" % str(learning_rate))
    logger.log("momentum=%s" % str(momentum))
    logger.log("num_epochs=%d" % num_epochs)
    logger.log("minibatch_size=%d" % minibatch_size)

    #Load the data
    train_set, val_set, test_set = load_data(dataset,
                                             split_proportions,
                                             logger,
                                             shared=False)
    x_train, y_train = train_set
    x_val, y_val = val_set
    x_test, y_test = test_set
    #normalize data to zero mean unit variance
    x_mean = np.mean(x_train)
    x_std = np.std(x_train)
    x_train = (x_train - x_mean) / x_std
    x_val = (x_val - x_mean) / x_std
    x_test = (x_test - x_mean) / x_std

    #prepare theano variables for inputs and targets
    input_var = T.matrix('inputs')
    target_var = T.ivector('targets')
    #build the model
    logger.log('... building the model')
    input_size = x_train.shape[1]
    output_size = np.unique(y_train).size
    #net = build_network(input_var,
    #input_size,
    #hidden_sizes,
    #hidden_nonlinearity,
    #dropout_probs,
    #output_size)
    net = cPickle.load(open('best_model.pkl', 'r'))
    layers = lasagne.layers.get_all_layers(net)
    input_var = layers[0].input_var
    #create loss expression for training
    logger.log('... building expressions and compiling train functions')
    predicition = lasagne.layers.get_output(net)
    loss = lasagne.objectives.categorical_crossentropy(predicition, target_var)
    loss = loss.mean()
    #create update expressions for training
    params = lasagne.layers.get_all_params(net, trainable=True)
    #linearly decay learning rate
    learning_rate = np.linspace(learning_rate[0], learning_rate[1], num_epochs)
    lr = theano.shared(np.array(learning_rate[0], dtype=theano.config.floatX))
    #linearly grow momentum
    momentum = np.linspace(momentum[0], momentum[1], num_epochs)
    mom = theano.shared(np.array(momentum[0], dtype=theano.config.floatX))
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=lr,
                                                momentum=mom)
    #create loss expression for validation/testing
    test_prediction = lasagne.layers.get_output(net, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = loss.mean()
    #create an expression for the classification accuracy
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)
    #compile the training function
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    #compile a validation function for the validation loss and accuracy
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    #train the model
    logger.log('... training the model')
    start_time = timeit.default_timer()
    best_validation_loss = np.inf
    training_NLL = [
    ]  # average training NLL cost at each epoch (really after val_freq iters)
    validation_NLL = []  # average validation NLL cost at each epoch
    validation_zero_one = [
    ]  # average zero one cost at each epoch (% misclassified)
    train_stat_dict = {}
    train_stat_dict['training_NLL'] = training_NLL
    train_stat_dict['validation_NLL'] = validation_NLL
    train_stat_dict['validation_zero_one'] = validation_zero_one

    for epoch in xrange(num_epochs):
        #do a pass over the training data
        lr.set_value(learning_rate[epoch])
        mom.set_value(momentum[epoch])
        train_err = 0
        train_batches = 0
        for batch in iterate_minibatches(x_train,
                                         y_train,
                                         minibatch_size,
                                         shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1
        #do a pass over the validation data
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(x_val,
                                         y_val,
                                         minibatch_size,
                                         shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1
        #record results
        training_NLL.append(train_err / train_batches)
        validation_NLL.append(val_err / val_batches)
        validation_zero_one.append(1 - (val_acc / val_batches))
        logger.log('epoch %i:' % (epoch))
        logger.log('\ttraining NLL loss: %f ' % training_NLL[-1])
        logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1])
        logger.log('\tvalidation zero one loss: %f %%' %
                   (validation_zero_one[-1] * 100.))
        # if we got the best validation score until now
        if validation_zero_one[-1] < best_validation_loss:
            # save best validation score and iteration number
            best_validation_loss = validation_zero_one[-1]
            best_epoch = epoch
            #save the best model
            with open('best_model.pkl', 'w') as f:
                cPickle.dump(net, f)
        # update the best model in a sliding window looking back 50 epochs
        #window_start = max(len(validation_zero_one)-50,0)
        #window_end = len(validation_zero_one)
        #if validation_zero_one[-1] == min(validation_zero_one[window_start:window_end]):
        ## save best validation score and iteration number
        #best_window_validation_loss = validation_zero_one[-1]
        #best_window_epoch = epoch
        ##save the best model
        #with open('best_window_model.pkl', 'w') as f:
        #cPickle.dump(net,f)
        if (epoch - best_epoch) > 1e4:
            logger.log("Early stopping...")
            break

    ######post training#######

    #save train_stat_dict to a .mat file
    sio.savemat('train_stats.mat', train_stat_dict)
    #with open('train_stat_dict.pkl','w') as f:
    #  cPickle.dump(train_stat_dict,f)

    # After training, we compute and print the test error:
    #load best model
    logger.log("loading model from best_model.pkl")
    net = cPickle.load(open('best_model.pkl', 'r'))
    #logger.log("loading model from best_window_model.pkl")
    #window_net = cPickle.load(open('best_window_model.pkl','r'))
    test_err, test_acc = predict(net, x_test, y_test)
    test_score = 1 - test_acc
    #test_err, test_acc = predict(window_net,x_test,y_test)
    #window_test_score = 1 - test_acc
    end_time = timeit.default_timer()
    logger.log(('Optimization complete with best validation score of %f %%, '
                'on epoch %i, '
                'with test performance %f %%') %
               (best_validation_loss * 100., best_epoch, test_score * 100.))
    logger.log('The training code for file ' + os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.))

    logger.close()
Esempio n. 4
0
def run_deepNet(dataset = 'KSC.pkl',
		split_proportions = [6,2,2],                
		hidden_sizes = [20],
		hidden_nonlinearity = lasagne.nonlinearities.rectify,
		dropout_probs = [0.5],
		learning_rate = 0.1,
		momentum = 0.9,
		num_epochs = int(5e4),
		minibatch_size = 64,
		log_file = 'log'):
  
  #create a log object
  logger = Logger(log_file)
  
  #log run params
  logger.log("Running run_deepNet Experiment...")
  logger.add_newline()
  logger.log("Runtime params:")  
  logger.log("dataset=%s" % dataset)
  logger.log("split_proportions=%s" % str(split_proportions))
  logger.log("hidden_sizes=%s" % str(hidden_sizes))
  logger.log("hidden_nonlinearity=%s" % str(hidden_nonlinearity))
  logger.log("dropout_probs=%s" % str(dropout_probs))
  logger.log("learning_rate=%s" % str(learning_rate))
  logger.log("momentum=%s" % str(momentum))
  logger.log("num_epochs=%d" % num_epochs)
  logger.log("minibatch_size=%d" % minibatch_size)  
    
  #Load the data
  train_set,val_set,test_set = load_data(dataset,split_proportions,
					    logger,shared=False)
  x_train, y_train = train_set
  x_val, y_val = val_set
  x_test, y_test = test_set
  #normalize data to zero mean unit variance
  x_mean = np.mean(x_train)
  x_std = np.std(x_train)
  x_train = (x_train-x_mean)/x_std
  x_val = (x_val-x_mean)/x_std
  x_test = (x_test-x_mean)/x_std
      
  #prepare theano variables for inputs and targets
  input_var = T.matrix('inputs')
  target_var = T.ivector('targets')
  #build the model
  logger.log( '... building the model')
  input_size = x_train.shape[1]
  output_size=np.unique(y_train).size
  #net = build_network(input_var,
		      #input_size,
		      #hidden_sizes,
		      #hidden_nonlinearity,
		      #dropout_probs,
		      #output_size)
  net = cPickle.load(open('best_model.pkl','r'))
  layers = lasagne.layers.get_all_layers(net)
  input_var = layers[0].input_var
  #create loss expression for training
  logger.log( '... building expressions and compiling train functions')
  predicition = lasagne.layers.get_output(net)
  loss = lasagne.objectives.categorical_crossentropy(predicition,target_var)
  loss = loss.mean()
  #create update expressions for training
  params = lasagne.layers.get_all_params(net,trainable=True)
  #linearly decay learning rate
  learning_rate = np.linspace(learning_rate[0],learning_rate[1],num_epochs)
  lr = theano.shared(np.array(learning_rate[0],dtype=theano.config.floatX))
  #linearly grow momentum
  momentum = np.linspace(momentum[0],momentum[1],num_epochs)
  mom = theano.shared(np.array(momentum[0],dtype=theano.config.floatX))
  updates = lasagne.updates.nesterov_momentum(loss,params,
					      learning_rate=lr,
					      momentum=mom)
  #create loss expression for validation/testing
  test_prediction = lasagne.layers.get_output(net,deterministic=True)
  test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
							  target_var)
  test_loss = loss.mean()
  #create an expression for the classification accuracy
  test_acc = T.mean(T.eq(T.argmax(test_prediction,axis=1), target_var),
		    dtype=theano.config.floatX)
  #compile the training function
  train_fn = theano.function([input_var,target_var],
			     loss,
			     updates=updates)
  #compile a validation function for the validation loss and accuracy
  val_fn = theano.function([input_var,target_var],
			   [test_loss,test_acc])
  #train the model
  logger.log( '... training the model')
  start_time = timeit.default_timer()
  best_validation_loss = np.inf    
  training_NLL = []     # average training NLL cost at each epoch (really after val_freq iters)
  validation_NLL = []  # average validation NLL cost at each epoch
  validation_zero_one = [] # average zero one cost at each epoch (% misclassified)
  train_stat_dict = {}
  train_stat_dict['training_NLL'] = training_NLL
  train_stat_dict['validation_NLL'] = validation_NLL
  train_stat_dict['validation_zero_one'] = validation_zero_one    
    
  for epoch in xrange(num_epochs):
    #do a pass over the training data
    lr.set_value(learning_rate[epoch])
    mom.set_value(momentum[epoch])
    train_err=0
    train_batches=0
    for batch in iterate_minibatches(x_train,y_train,
				     minibatch_size,shuffle=True):
      inputs, targets = batch
      train_err += train_fn(inputs, targets)
      train_batches += 1
    #do a pass over the validation data
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(x_val, y_val,
				     minibatch_size,shuffle=False):
      inputs, targets = batch
      err, acc = val_fn(inputs, targets)
      val_err += err
      val_acc += acc
      val_batches += 1
    #record results
    training_NLL.append(train_err / train_batches)
    validation_NLL.append(val_err / val_batches)
    validation_zero_one.append(1-(val_acc/val_batches))
    logger.log( 'epoch %i:' % (epoch))
    logger.log('\ttraining NLL loss: %f ' % training_NLL[-1])
    logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1])
    logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.))
    # if we got the best validation score until now                                        
    if validation_zero_one[-1] < best_validation_loss: 		                            
        # save best validation score and iteration number
        best_validation_loss = validation_zero_one[-1]
        best_epoch = epoch         
        #save the best model
        with open('best_model.pkl', 'w') as f:
                cPickle.dump(net,f)
    # update the best model in a sliding window looking back 50 epochs
    #window_start = max(len(validation_zero_one)-50,0)
    #window_end = len(validation_zero_one)
    #if validation_zero_one[-1] == min(validation_zero_one[window_start:window_end]):
        ## save best validation score and iteration number
        #best_window_validation_loss = validation_zero_one[-1]
        #best_window_epoch = epoch         
        ##save the best model
        #with open('best_window_model.pkl', 'w') as f:
                #cPickle.dump(net,f)
    if (epoch-best_epoch)>1e4: 
      logger.log("Early stopping...")
      break
                 
  ######post training#######
  
  #save train_stat_dict to a .mat file
  sio.savemat('train_stats.mat',train_stat_dict)                        
  #with open('train_stat_dict.pkl','w') as f:                        
  #  cPickle.dump(train_stat_dict,f)     
    
  # After training, we compute and print the test error:
  #load best model
  logger.log("loading model from best_model.pkl")
  net = cPickle.load(open('best_model.pkl','r'))
  #logger.log("loading model from best_window_model.pkl")
  #window_net = cPickle.load(open('best_window_model.pkl','r'))
  test_err, test_acc = predict(net,x_test,y_test)
  test_score = 1 - test_acc
  #test_err, test_acc = predict(window_net,x_test,y_test)
  #window_test_score = 1 - test_acc
  end_time = timeit.default_timer()
  logger.log(
      (
          'Optimization complete with best validation score of %f %%, '
          'on epoch %i, '
          'with test performance %f %%'
      )
      % (best_validation_loss * 100., best_epoch, test_score * 100.)
  )    
  logger.log ('The training code for file ' +
                        os.path.split(__file__)[1] +
                        ' ran for %.2fm' % ((end_time - start_time) / 60.)
  )

  logger.close()