def test_DBN(finetune_lr, pretraining_epochs, pretrain_lr, cdk, usepersistent, training_epochs, L1_reg, L2_reg, hidden_layers_sizes, dataset, batch_size, output_folder, shuffle, scaling, dropout, first_layer, dumppath): """ Demonstrates how to train and test a Deep Belief Network. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type cdk: int :param cdk: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print "%d training examples" % train_set_x.get_value(borrow=True).shape[0] print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1] # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network nclass = max(train_set_y.eval()) + 1 dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=nclass, L1_reg=L1_reg, L2_reg=L2_reg, first_layer=first_layer) print 'n_ins:%d' % train_set_x.get_value(borrow=True).shape[1] print 'n_outs:%d' % nclass # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced(nclass, train_set_y) # getting pre-training and fine-tuning functions # save images of the weights(receptive fields) in this output folder # if not os.path.isdir(output_folder): # os.makedirs(output_folder) # os.chdir(output_folder) print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, cdk=cdk, usepersistent=usepersistent) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) trng = MRG_RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8) for i in range(dbn.n_layers): dbn.sigmoid_layers[i].output = dropout_layer( use_noise, dbn.sigmoid_layers[i].output, trng, 0.5) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... pre-training the model' plotting_time = 0. start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): if pretrain_dropout: use_noise.set_value(1.) # use dropout at pre-training # go through the training set c = [] for batch_index in xrange(n_train_batches): # FIXME: n_train_batches is a fake item bc_idx = balanced_seg.get_bc_idx(SP, nclass) c.append(pretraining_fns[i](bc_idx=bc_idx, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ''' for j in range(dbn.n_layers): if j == 0: # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix this_layer = dbn.rbm_layers[j] this_field = this_layer.W.get_value(borrow=True).T print "field shape (%d,%d)"%this_field.shape image = Image.fromarray( tile_raster_images( X=this_field[0:100], # take only the first 100 fields (100 * n_visible) #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer # if n_visible = 144 (12,12), if n_visible = 1512 (36,42) img_shape=(12, 12), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) ''' end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 # while (epoch < training_epochs) and (not done_looping): while (epoch < training_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout at training time # FIXME: n_train_batches is a fake item bc_idx = balanced_seg.get_bc_idx(SP, nclass) minibatch_avg_cost = train_fn(bc_idx) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: use_noise.set_value(0.) # stop dropout at validation/test time validation_losses = validate_model() training_losses = train_model() this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) # also monitor the training losses print('epoch %i, minibatch %i/%i, training error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) with open(dumppath, "wb") as f: cPickle.dump(dbn.params, f) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_lstm( # word embedding in ACE's context can be regarded as the feature vector size of each ns frame dim_proj=None, # word embeding dimension and LSTM number of hidden units. xdim=None, ydim=None, format=None, patience=10, # Number of epoch to wait before early stop if no progress max_epochs=500, # The maximum number of epoch to run dispFreq=10, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.001, # Learning rate for sgd (not used for adadelta and rmsprop) # n_words=10000, # Vocabulary size optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. dumppath='blstm_model.npz', # The best model will be saved there validFreq=400, # Compute the validation error after this number of update. saveFreq=1000, # Save the parameters after every saveFreq updates maxlen=None, # Sequence longer then this get ignored batch_size=100, # The batch size during training. valid_batch_size=100, # The batch size used for validation/test set. dataset=None, # Parameter for extra option noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. scaling=1): # Model options model_options = locals().copy() print "model options", model_options #load_data, prepare_data = get_dataset(dataset) print 'Loading data' train, valid, test = load_data_varlen(dataset=dataset, valid_portion=0.1, test_portion=0.1, maxlen=None, scaling=scaling, robust=0, format=format, h5py=1) print 'data loaded' ''' if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) ''' ydim = numpy.max(train[1]) + 1 # ydim = numpy.max(train[1]) print 'ydim = %d' % ydim model_options['ydim'] = ydim model_options['xdim'] = xdim model_options['dim_proj'] = dim_proj print 'Building model' # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params('lstm_model.npz', params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, mask, oh_mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U']**2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, mask, oh_mask, y], cost, name='f_cost') grads = T.grad(cost, wrt=tparams.values()) f_grad = theano.function([x, mask, oh_mask, y], grads, name='f_grad') lr = T.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, oh_mask, y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced_noeval(ydim, train[1]) try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # FIXME: train_index is not used, kf is not used bc_idx = balanced_seg.get_bc_idx(SP, ydim) # Select the random examples for this minibatch y = [train[1][t] for t in bc_idx] x = [train[0][t] for t in bc_idx] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, oh_mask, y = prepare_data(x, y, xdim=xdim, maxlen=maxlen) n_samples += x.shape[1] cost = f_grad_shared(x, mask, oh_mask, y) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost if dumppath and numpy.mod(uidx, saveFreq) == 0: print 'Saving...', # save the best param set to date (best_p) if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(dumppath, history_errs=history_errs, **params) # pkl.dump(model_options, open('%s.pkl' % dumppath, 'wb'), -1) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) # test_err = pred_error(f_pred, prepare_data, test, kf_test) test_err = 1 history_errs.append([valid_err, test_err]) # save param only if the validation error is less than the history minimum if (uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = unzip(tparams) bad_counter = 0 print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) # early stopping if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size) train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) # test_err = pred_error(f_pred, prepare_data, test, kf_test) test_err = 1 print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err if dumppath: numpy.savez(dumppath, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
def train_lstm( # word embedding in ACE's context can be regarded as the feature vector size of each ns frame dim_proj=None, # word embeding dimension and LSTM number of hidden units. xdim=None, ydim=None, format=None, patience=10, # Number of epoch to wait before early stop if no progress max_epochs=500, # The maximum number of epoch to run dispFreq=10, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.001, # Learning rate for sgd (not used for adadelta and rmsprop) # n_words=10000, # Vocabulary size optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. dumppath='blstm_model.npz', # The best model will be saved there validFreq=400, # Compute the validation error after this number of update. saveFreq=1000, # Save the parameters after every saveFreq updates maxlen=None, # Sequence longer then this get ignored batch_size=100, # The batch size during training. valid_batch_size=100, # The batch size used for validation/test set. dataset=None, # Parameter for extra option noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. scaling=1 ): # Model options model_options = locals().copy() print "model options", model_options #load_data, prepare_data = get_dataset(dataset) print 'Loading data' train, valid, test = load_data_varlen(dataset=dataset, valid_portion=0.1, test_portion=0.1, maxlen=None, scaling=scaling, robust=0, format=format, h5py=1) print 'data loaded' ''' if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) ''' ydim = numpy.max(train[1]) + 1 # ydim = numpy.max(train[1]) print 'ydim = %d'%ydim model_options['ydim'] = ydim model_options['xdim'] = xdim model_options['dim_proj'] = dim_proj print 'Building model' # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params('lstm_model.npz', params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, mask, oh_mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U'] ** 2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, mask, oh_mask, y], cost, name='f_cost') grads = T.grad(cost, wrt=tparams.values()) f_grad = theano.function([x, mask, oh_mask, y], grads, name='f_grad') lr = T.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, oh_mask, y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced_noeval(ydim,train[1]) try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # FIXME: train_index is not used, kf is not used bc_idx = balanced_seg.get_bc_idx(SP,ydim) # Select the random examples for this minibatch y = [train[1][t] for t in bc_idx] x = [train[0][t] for t in bc_idx] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, oh_mask, y = prepare_data(x, y, xdim=xdim, maxlen=maxlen) n_samples += x.shape[1] cost = f_grad_shared(x, mask, oh_mask, y) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost if dumppath and numpy.mod(uidx, saveFreq) == 0: print 'Saving...', # save the best param set to date (best_p) if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(dumppath, history_errs=history_errs, **params) # pkl.dump(model_options, open('%s.pkl' % dumppath, 'wb'), -1) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) # test_err = pred_error(f_pred, prepare_data, test, kf_test) test_err = 1 history_errs.append([valid_err, test_err]) # save param only if the validation error is less than the history minimum if (uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = unzip(tparams) bad_counter = 0 print ('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) # early stopping if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size) train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) # test_err = pred_error(f_pred, prepare_data, test, kf_test) test_err = 1 print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err if dumppath: numpy.savez(dumppath, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
def test_DBN(finetune_lr, pretraining_epochs, pretrain_lr, cdk, usepersistent, training_epochs, L1_reg, L2_reg, hidden_layers_sizes, dataset, batch_size, output_folder, shuffle, scaling, dropout, first_layer, dumppath): """ Demonstrates how to train and test a Deep Belief Network. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type cdk: int :param cdk: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print "%d training examples" % train_set_x.get_value(borrow=True).shape[0] print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1] # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network nclass = max(train_set_y.eval())+1 dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=nclass, L1_reg=L1_reg, L2_reg=L2_reg, first_layer=first_layer) print 'n_ins:%d'% train_set_x.get_value(borrow=True).shape[1] print 'n_outs:%d'% nclass # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced(nclass,train_set_y) # getting pre-training and fine-tuning functions # save images of the weights(receptive fields) in this output folder # if not os.path.isdir(output_folder): # os.makedirs(output_folder) # os.chdir(output_folder) print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, cdk=cdk, usepersistent=usepersistent) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) trng = MRG_RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8) for i in range(dbn.n_layers): dbn.sigmoid_layers[i].output = dropout_layer(use_noise, dbn.sigmoid_layers[i].output, trng, 0.5) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... pre-training the model' plotting_time = 0. start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): if pretrain_dropout: use_noise.set_value(1.) # use dropout at pre-training # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ''' for j in range(dbn.n_layers): if j == 0: # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix this_layer = dbn.rbm_layers[j] this_field = this_layer.W.get_value(borrow=True).T print "field shape (%d,%d)"%this_field.shape image = Image.fromarray( tile_raster_images( X=this_field[0:100], # take only the first 100 fields (100 * n_visible) #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer # if n_visible = 144 (12,12), if n_visible = 1512 (36,42) img_shape=(12, 12), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) ''' end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 # while (epoch < training_epochs) and (not done_looping): while (epoch < training_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout at training time # FIXME: n_train_batches is a fake item bc_idx = balanced_seg.get_bc_idx(SP,nclass) minibatch_avg_cost = train_fn(bc_idx) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: use_noise.set_value(0.) # stop dropout at validation/test time validation_losses = validate_model() training_losses = train_model() this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) # also monitor the training losses print( 'epoch %i, minibatch %i/%i, training error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100. ) ) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) with open(dumppath, "wb") as f: cPickle.dump(dbn.params, f) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs, hidden_layers_sizes, dataset, batch_size, datasel, shuffle, scaling, dropout, earlystop, dumppath): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the dataset """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust, h5py=1) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch bcidx = T.ivector('bcidx') x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) nclass = max(train_set_y.eval()) + 1 print "n_in = %d" % train_set_x.get_value(borrow=True).shape[1] print "n_out = %d" % nclass # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_out=nclass) # dropout the hidden layers trng = RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8) for i in range(classifier.n_layers): classifier.hiddenlayers[i].output = dropout_layer( use_noise, classifier.hiddenlayers[i].output, trng, 0.5) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_score = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) pred_probs = theano.function( inputs=[index], outputs=classifier.predprobs, givens={ x: train_set_x[index:1000], # y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` # train_model = theano.function( # inputs=[index], # outputs=cost, # updates=updates, # givens={ # x: train_set_x[index * batch_size: (index + 1) * batch_size], # y: train_set_y[index * batch_size: (index + 1) * batch_size] # } # ) train_model = theano.function(inputs=[bcidx], outputs=cost, updates=updates, givens={ x: train_set_x[bcidx], y: train_set_y[bcidx] }) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced(nclass, train_set_y) while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break # while (epoch < n_epochs): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout # FIXME: n_train_batches is a fake item # get balanced batch indices bc_idx = balanced_seg.get_bc_idx(SP, nclass) minibatch_avg_cost = train_model(bc_idx) # minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set use_noise.set_value( 0.) # at validation/testing time, no dropout validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] training_losses = [ train_score(i) for i in xrange(n_train_batches) ] this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) probs = [pred_probs(i) for i in xrange(n_train_batches)] print('epoch %i, minibatch %i/%i, training error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save model with open(dumppath, "wb") as f: cPickle.dump(classifier.params, f) best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs, hidden_layers_sizes, dataset, batch_size, datasel, shuffle, scaling, dropout, earlystop, dumppath): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the dataset """ print locals() datasets = loadmat(dataset=dataset,shuffle=shuffle,datasel=datasel,scaling=scaling,robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch bcidx = T.ivector('bcidx') x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) nclass = max(train_set_y.eval()) + 1 print "n_in = %d"%train_set_x.get_value(borrow=True).shape[1] print "n_out = %d"%nclass # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_out=nclass ) # dropout the hidden layers trng = RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8) for i in range(classifier.n_layers): classifier.hiddenlayers[i].output = dropout_layer(use_noise, classifier.hiddenlayers[i].output, trng, 0.5) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) train_score = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) pred_probs = theano.function( inputs=[index], outputs=classifier.predprobs, givens={ x: train_set_x[index:1000], # y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` # train_model = theano.function( # inputs=[index], # outputs=cost, # updates=updates, # givens={ # x: train_set_x[index * batch_size: (index + 1) * batch_size], # y: train_set_y[index * batch_size: (index + 1) * batch_size] # } # ) train_model = theano.function( inputs=[bcidx], outputs=cost, updates=updates, givens={ x: train_set_x[bcidx], y: train_set_y[bcidx] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced(nclass,train_set_y) while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break # while (epoch < n_epochs): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout # FIXME: n_train_batches is a fake item # get balanced batch indices bc_idx = balanced_seg.get_bc_idx(SP,nclass) minibatch_avg_cost = train_model(bc_idx) # minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set use_noise.set_value(0.) # at validation/testing time, no dropout validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] training_losses = [train_score(i) for i in xrange(n_train_batches)] this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) probs = [pred_probs(i) for i in xrange(n_train_batches)] print( 'epoch %i, minibatch %i/%i, training error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100. ) ) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save model with open(dumppath, "wb") as f: cPickle.dump(classifier.params, f) best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))