def music_timestep(t, k, x_tm1, s_tm1, music): with tf.variable_scope('generation_timestep'): if self.num_rnn_cells > 1: u_tm1 = s_tm1[-1].c q_tm1 = s_tm1[-1].h else: u_tm1 = s_tm1.c q_tm1 = s_tm1.h # Use the previous lstm state to compute the biases # for each layer of the dbn dbn_biases = [] for wu, wq, b in zip(self.Wu, self.Wq, self.B): dbn_biases.append( tf.matmul(u_tm1, wu) + tf.matmul(q_tm1, wq) + b) dbn = DBN(self.W, dbn_biases) notes_t = dbn.gen_sample(25, x=x_tm1) _, s_t = self.rnn(notes_t, s_tm1) # Concatenate the current music timestep to the whole song music = music + tf.concat([ tf.zeros([t, self.v_size]), notes_t, tf.zeros([k - t - 1, self.v_size]) ], 0) return t + 1, k, notes_t, s_t, music
def DBN_JIT(train_features, train_labels, test_features, test_labels, hidden_units=[20, 12, 12], num_epochs_LR=200): # training DBN model ################################################################################################# starttime = time.time() dbn_model = DBN(visible_units=train_features.shape[1], hidden_units=hidden_units, use_gpu=False) dbn_model.train_static(train_features, train_labels, num_epochs=10) # Finishing the training DBN model # print('---------------------Finishing the training DBN model---------------------') # using DBN model to construct features DBN_train_features, _ = dbn_model.forward(train_features) DBN_test_features, _ = dbn_model.forward(test_features) DBN_train_features = DBN_train_features.numpy() DBN_test_features = DBN_test_features.numpy() train_features = np.hstack((train_features, DBN_train_features)) test_features = np.hstack((test_features, DBN_test_features)) if len(train_labels.shape) == 1: num_classes = 1 else: num_classes = train_labels.shape[1] # lr_model = LR(input_size=hidden_units, num_classes=num_classes) lr_model = LR(input_size=train_features.shape[1], num_classes=num_classes) optimizer = torch.optim.Adam(lr_model.parameters(), lr=0.00001) steps = 0 batches_test = mini_batches(X=test_features, Y=test_labels) for epoch in range(1, num_epochs_LR + 1): # building batches for training model batches_train = mini_batches_update(X=train_features, Y=train_labels) for batch in batches_train: x_batch, y_batch = batch x_batch, y_batch = torch.tensor(x_batch).float(), torch.tensor(y_batch).float() optimizer.zero_grad() predict = lr_model.forward(x_batch) loss = nn.BCELoss() loss = loss(predict, y_batch) loss.backward() optimizer.step() # steps += 1 # if steps % 100 == 0: # print('\rEpoch: {} step: {} - loss: {:.6f}'.format(epoch, steps, loss.item())) endtime = time.time() dtime = endtime - starttime print("Train Time: %.8s s" % dtime) #显示到微秒 starttime = time.time() y_pred, lables = lr_model.predict(data=batches_test) endtime = time.time() dtime = endtime - starttime print("Eval Time: %.8s s" % dtime) #显示到微秒 return y_pred
def __init__(self, N, name=None, disp=False, noise=None): # If no name, we create an experience if name is None: num = 1 fname = path.join(EXP_DIR, 'exp%i.data' % num) while path.exists(fname): num += 1 fname = path.join(EXP_DIR, 'exp%i.data' % num) name = 'exp%i' % num self.name = name # If there exist no previous file, # We create one from the default experience fname = path.join(EXP_DIR, '%s.data' % name) if not path.exists(fname): e_name = ''.join(c for c in name if c.isalpha()) e_name = path.join(EXP_DIR, '%s.data' % e_name) from shutil import copy if not path.exists(e_name): copy(path.join(EXP_DIR, 'default.data'), fname) else: copy(e_name, fname) #We load the experience parameters import json with open(fname) as f: params_exp = json.load(f) self.fname = fname self.params_exp = params_exp # Create the instance of the experience self.queue = Queue() self.obs = lambda: Observer(name, self.queue, disp) self.dbn = DBN(params_exp['lay_shape'], self.queue, noise) #Load the model weights if it exists self.exists = self.dbn.load(name) #Load the dataset and split between train and test set from DataFeeder import DataFeeder self.data = DataFeeder(N, batch_s=1000)
device = "cpu" EPOCHS = 2 lr = 0.5 visible_units = 828 hidden_units = [700 , 550, 300] k = 1 learning_rate_decay = False xavier_init = False increase_to_cd_k = False use_gpu = False learning_rate = 0.1 iterations = 100 rbm = DBN(visible_units,hidden_units,k ,lr,learning_rate_decay,xavier_init, increase_to_cd_k,use_gpu) # rbm_mnist.train(train_loader , EPOCHS,batch_size) rbm.train_static(tens_x,tens_y,num_epochs=EPOCHS,batch_size=batch_size) torch.save(rbm.state_dict(), f'wts/{EPOCHS}.pt') # rbm.load_state_dict('wts/'+str(EPOCHS)+'.pt') l = data_np.shape[0] tr = int(0.8*l) valid_np = data_np[tr:] data_np = data_np[:tr] print("-----------------------------") print(data_np.shape) print(valid_np.shape) train_ls = []
from DBN import DBN from dbn_util import * d, l = loadDataSet("train.txt", 784) #d=d[0:100] #l=l[0:100] testd, testl = loadDataSet("test.txt", 784) #testd=testd[0:10] #testl=testl[0:10] testD = DBN([100, 100]) testD.trainDBN(d, l, batchsize=[200, 20], numepoch=[50, 200]) testD.Print() r = [] for i in range(0, len(testd)): r.append(testD.predict(testd[i])) print testl c = map(lambda x: x + 1, map(argmax, r)) print c dif = array(c) - array(testl) print dif rate = 0.0 for i in range(0, len(testd)): if dif[i] == 0: rate += 1 print rate / len(testd)
import tensorflow.examples.tutorials.mnist.input_data as input_data import numpy as np import tensorflow as tf import tensorlayer as tl from DBN import DBN mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) X_train = mnist.train.images Y_train = mnist.train.labels X_test = mnist.test.images Y_test = mnist.test.labels dbn = DBN(n_units=[784, 500, 300, 100, 10], learning_rate_rbm=[0.001, 0.001, 0.001], batch_size_rbm=[100, 100, 100], n_epoch_rbm=[10, 10, 10], visible_unit_type_rbm=['bin', 'bin', 'bin'], weight_cost_rbm=0.0001, momentum_rbm=0.5, learning_rate_dbn=0.001, batch_size_dbn=100, n_epoch_dbn=100, dropout_dbn=[1, 1, 1]) dbn.pretrain(X_train, X_test) dbn.fit(X_train, Y_train, X_test, Y_test)
vis = len(data[0]) # ----------------------------------------------------------------------------- # Construct DBN # ----------------------------------------------------------------------------- pre_trained = os.path.isfile('DBN.h5') sampler = PersistentContrastiveDivergence(k=k, hidden_activations=True) optimizer = SGD(learning_rate=pretrain_lr, momentum=momentum, weight_decay=weight_decay) dbn = DBN(n_visible=vis, hidden_layer_sizes=hidden_layers, sample_copies=sample_copies, sampler=sampler, optimizer=optimizer, continuous_output=continuous_out, device=device) if pre_trained: dbn.load_model('DBN.h5') # ----------------------------------------------------------------------------- # Training # ----------------------------------------------------------------------------- if not pre_trained: dbn.pretrain(input_data=data, epochs=pretrain_epochs, batch_size=batch_size, test=test) dbn.finetune(input_data=data, lr=finetune_lr,
def gen_displayable_images(): suffix = '_image.jpg' for n in range(10): prefix = './images_DBN/digitwise/' + str(n) + '_' names = ['original', 'hidden', 'reconstructed'] names = [prefix + name + suffix for name in names] image_beautifier(names, './images_DBN/' + str(n) + '.jpg') if __name__ == '__main__': mnist = MNIST() train_x, train_y, test_x, test_y = mnist.load_dataset() layers = [512, 128, 64, 10] dbn = DBN(train_x.shape[1], layers) dbn.layer_parameters = torch.load('mnist_trained_dbn.pt') for n in range(10): x = test_x[np.where(test_y == n)[0][0]] x = x.unsqueeze(0) gen_image, hidden_image = dbn.reconstructor(x) gen_image = gen_image.numpy() hidden_image = hidden_image.numpy() image = x.numpy() image = mnist.inv_transform_normalizer(image)[0] hidden_image = (hidden_image * 255)[0] gen_image = mnist.inv_transform_normalizer(gen_image)[0] image = np.reshape(image, (28, 28))
from DBN import DBN from dbn_util import * import pickle import sys train_file = sys.argv[1] model_file = sys.argv[2] d, l = loadDataSet(train_file, 784) d = d[0:100] l = l[0:100] #testd=testd[0:10] #testl=testl[0:10] model = DBN([20, 10]) model.trainDBN(d, l, batchsize=[20, 10], numepoch=[10, 20]) file = open(model_file, "wb") pickle.dump(model, file) model.Print() file.close()
def create_unsuprvised_model(self): self.dbn_1 = DBN(self.n_units_1, self.iter_1) self.dbn_2 = DBN(self.n_units_2, self.iter_2) self.dbn_3 = DBN(self.n_units_3, self.iter_3)
dataset_list = np.array(dataset_list) trainset = dataset_list[:int(len(dataset_list) * (1 - test_percentage))] testset = dataset_list[int(len(dataset_list) * (1 - test_percentage)):] x_train = trainset[:, :-1] y_train = trainset[:, -1:] x_test = testset[:, :-1] y_test = testset[:, -1:] print('x_train.shape:' + str(x_train.shape)) print('y_train.shape:' + str(y_train.shape)) print('x_test.shape:' + str(x_test.shape)) print('y_test.shape' + str(y_test.shape)) # Build model dbn = DBN(hidden_units, input_length, output_length, device=device) # Train model dbn.pretrain(x_train, epoch=epoch_pretrain, batch_size=batch_size) dbn.finetune(x_train, y_train, epoch_finetune, batch_size, loss_function, optimizer(dbn.parameters())) # Make prediction and plot y_predict = dbn.predict(x_test, batch_size) y_real = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten() y_predict = scaler.inverse_transform(y_predict.reshape(-1, 1)).flatten() plt.figure(1) plt.plot(y_real, label='real') plt.plot(y_predict, label='prediction') plt.xlabel('MSE Error: {}'.format(mean_squared_error(y_real, y_predict))) plt.legend()
with open('E:/数据集/2020年2月nyiso数据集/2月2日数据/x_2month2_all.csv', 'r', encoding="utf-8") as file: # (标准IEEE14的测试数据) #with open('E:/数据集/2020年2月nyiso数据集/2月1日数据/all_有名值_数据/x_2month1_all.csv', 'r', encoding="utf-8") as file: # (标准IEEE14的测试数据) reader = csv.reader(file) a = [] for item in reader: a.append(item) a = [[float(x) for x in item] for item in a] #将矩阵数据转化为浮点型 data = np.array(a) x_data_migration_test = autoNorm(data[:, 0:54]) y_data_migration_test = data[:, [54, 55]] x_data_migration_test = x_data_migration_test.astype(np.float32) y_data_migration_test = y_data_migration_test.astype(np.float32) opts = DLOption(18, 450, 0.1, 0.2, 1000, 0, 0., 0., 0.01, 7000, 300, 0.001, 50000) dbn = DBN([40, 20, 12], opts, x_data_pretrain) dbn.train() nn = NN([40, 20, 12], [10, 6], opts, x_data_train, y_data_train, x_data_test, y_data_test, x_migration_train, y_migration_train, x_migration_test, y_migration_test, x_data_migration_test, y_data_migration_test, [10]) nn.load_from_dbn(dbn) nn.train() #print( np.mean(np.argmax(y_data_test, axis=1) == nn.predict(x_data_test))) nn.train_migration() nn.train_migration_all()
def run_classification(pretrain_lr=0.001, # SdA and DBN learning_rate=0.01, L1_reg=0.001, L2_reg=0.0001, pretraining_epochs=3, # SdA and DBN n_epochs=5, batch_size=64, display_step=1000, dataset='mnist.pkl.gz', n_in=28*28, # mnist image shape input_shape=(-1,1,28,28), # CNN and LeNet5, this is MNIST dimensions n_out=10, # number of MNIST classes n_hidden=1000, # (1-layer) MLP hidden_layers_sizes=[500,500,500], CNN_filter_size=20, # CNN LeNet5_filter_sizes=[50,20], # LeNet5 corruption_levels=[0.1,0.2,0.3], # SdA k=1, # DBN # model_name can be the name of a model to create, # or file path to load a saved file model_name='LogisticRegression', best_model_file_path='best_model.pkl' ): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ ###################### # Instance Variables # ###################### # instance variables to be used in some of the models below numpy_rng = np.random.RandomState(1234) ############# # Load Data # ############# datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] val_set_x, val_set_y = datasets[1] test_set_x, test_set_y = datasets[2] ################################### # Calculate number of Minibatches # ################################### n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_val_batches = val_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ############################################ # allocate symbolic variables for the data # ############################################ index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ############### # BUILD MODEL # ############### print('... building the model') model=None if model_name == 'LogisticRegression': model = LogisticRegression( input=x, n_in=n_in, n_out=n_out ) elif model_name == 'MLP': model = MLP( numpy_rng=numpy_rng, input=x, n_in=n_in, n_hidden=n_hidden, n_out=n_out ) elif model_name == 'DeepMLP': model = DeepMLP( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) elif model_name == 'CNN': model = CNN( numpy_rng=numpy_rng, input=x, input_shape=input_shape, filter_sizes=[CNN_filter_size], n_out=n_out, batch_size=batch_size ) elif model_name == 'LeNet5': model = LeNet5( numpy_rng=numpy_rng, input=x, input_shape=input_shape, filter_sizes=LeNet5_filter_sizes, n_out=n_out, batch_size=batch_size ) elif model_name == 'SdA': model = SdA( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) elif model_name == 'DBN': model = DBN( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) # Assume the model_name is a path elif model_name != None: try: model = pickle.load(open(model_name)) except: raise "Error! Model file path not valid." else: raise "Error! No model selected." ######################################### # PRETRAINING THE MODEL (SdA, DBN Only) # ######################################### if (model_name == 'SdA') or (model_name == 'DBN'): print('... starting pretraining') ######################### # PreTraining Functions # ######################### print('... getting the pretraining functions') if model_name == 'SdA': pretraining_fns = model.pretraining_functions( x=x, # I had to move x here, instead of in the model, or there was an error. train_set_x=train_set_x, batch_size=batch_size) elif model_name == 'DBN': pretraining_fns = model.pretraining_functions( x=x, # I had to move x here, instead of in the model, or there was an error. train_set_x=train_set_x, batch_size=batch_size, k=k) ################## # PRETRAIN MODEL # ################## print('... pre-training the model') start_time = timeit.default_timer() if model_name == 'SdA': corruption_levels = [.1, .2, .3] ## Pre-train layer-wise for i in range(model.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set cost = [] for batch_index in range(n_train_batches): if model_name == 'SdA': cost.append( pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr) ) elif model_name == 'DBN': cost.append( pretraining_fns[i](index=batch_index, lr=pretrain_lr) ) print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch+1, np.mean(cost, dtype='float64')) ) end_time = timeit.default_timer() print(('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) print('...End of pre-training') ##################### # Training Function # ##################### cost, updates = model.get_cost_updates( y=y, L1_reg = L1_reg, L2_reg = L2_reg, learning_rate=learning_rate ) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=model.get_latest_cost(), updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, name='train' ) ################################## # Validation & Testing Functions # ################################## # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=[model.errors(y), model.get_loss(), model.get_L1(), model.get_L2_sqr()], givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size] }, name='validate' ) test_model = theano.function( inputs=[index], outputs=model.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }, name='test' ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False minibatch_training_costs = [] # go through training epochs while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): ################# # Training Step # ################# latest_minibatch_training_cost = train_model(minibatch_index) minibatch_training_costs.append(latest_minibatch_training_cost) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % display_step == 0: print('training @ iter = ', iter) if (iter + 1) % validation_frequency == 0: ################# # Training Loss # ################# this_training_loss = np.mean(minibatch_training_costs, dtype='float64') print('latest average training loss: %f' % (this_training_loss)) minibatch_training_costs = [] ################### # Validation Loss # ################### validation_losses = [validate_model(i)[0] for i in range(n_val_batches)] this_validation_loss = np.mean(validation_losses, dtype='float64') print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.) ) ######################## # Validation Sublosses # ######################## # Latest sublosses for our models include: unregularized loss, L1_norm, L2_norm unregularized_losses = [validate_model(i)[1] for i in range(n_val_batches)] this_unregularized_loss = np.mean(unregularized_losses, dtype='float64') L1_losses = [validate_model(i)[2] for i in range(n_val_batches)] this_L1_loss = np.mean(L1_losses, dtype='float64') L2_sqr_losses = [validate_model(i)[3] for i in range(n_val_batches)] this_L2_sqr_loss = np.mean(L2_sqr_losses, dtype='float64') print('latest total validation loss: %f' % (this_unregularized_loss + this_L1_loss + this_L2_sqr_loss) ) print('latest unregularized loss: %f' % (this_unregularized_loss) ) print('latest L1_norm: %f' % (this_L1_loss) ) print('latest L2_norm: %f' % (this_L2_sqr_loss) ) ################### # Save Best Model # ################### # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < (best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) ################### # Test Best Model # ################### test_losses = [test_model(i) for i in range(n_test_batches)] test_score = np.mean(test_losses, dtype='float64') print((' epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.) ) ################### # Sav Best Model # ################### with open(best_model_file_path, 'wb') as f: pickle.dump(model, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=1000, dataset='../mnist.pkl.gz', batch_size=10): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print('... building the model') # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print('... getting the pretraining functions') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print('... pre-training the model') start_time = timeit.default_timer() # Pre-train layer-wise for i in range(dbn.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') print(numpy.mean(c, dtype='float64')) end_time = timeit.default_timer() # end-snippet-2 print('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print('... getting the finetuning functions') train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print('... finetuning the model') # early-stopping parameters # look as this many examples regardless patience = 4 * n_train_batches # wait this much longer when a new best is found patience_increase = 2. # a relative improvement of this much is considered significant improvement_threshold = 0.995 # go through this many minibatches before checking the network on # the validation set; in this case we check every epoch validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses, dtype='float64') print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses, dtype='float64') print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
train_dataset.data = (train_dataset.train_data.type(torch.FloatTensor) / 255).bernoulli() #Lets us visualize a number from the data set idx = 5 img = train_dataset.train_data[idx] print("The number shown is the number: {}".format( train_dataset.train_labels[idx])) plt.imshow(img, cmap='gray') plt.show() # I have have set these hyper parameters although you can experiment with them to find better hyperparameters. dbn_mnist = DBN(visible_units=28 * 28, hidden_units=[23 * 23, 18 * 18], k=5, learning_rate=0.01, learning_rate_decay=True, xavier_init=True, increase_to_cd_k=False, use_gpu=False) num_epochs = 1 batch_size = 10 dbn_mnist.train_static(train_dataset.train_data, train_dataset.train_labels, num_epochs, batch_size) # visualising layer 1 learned_weights = dbn_mnist.rbm_layers[0].W.transpose(0, 1).numpy() plt.show() fig = plt.figure(3, figsize=(10, 10)) for i in range(25):
str({ 'epoch': epoch + 1, 'loss': round(running_loss, 4), 'acc': round(acc, 4) })) return model, progress if __name__ == '__main__': mnist = MNIST() train_x, train_y, test_x, test_y = mnist.load_dataset() layers = [512, 128, 64, 10] dbn = DBN(train_x.shape[1], layers, savefile='mnist_trained_dbn.pt') dbn.train_DBN(train_x) model = dbn.initialize_model() completed_model = torch.nn.Sequential(model, torch.nn.Softmax(dim=1)) torch.save(completed_model, 'mnist_trained_dbn_classifier.pt') print(completed_model) print('\n' * 3) print("Without Pre-Training") model = initialize_model() model, progress = train(model, train_x, train_y, train_x, train_y, test_x, test_y) progress = pd.DataFrame(np.array(progress))
def run_dbn_model(data_set, finetune_lr=0.1, pretraining_epochs=10, pretrain_lr=0.01, k=1, training_epochs=1000, batch_size=10): # get partitioned data sets train_data, test_data, validation_data = data_set.get_data() # get train x,y, and id train_data_x, train_data_y, train_data_id = train_data train_ten_x, train_ten_y = theano.shared(np.asarray(train_data_x, dtype=theano.config.floatX)), \ theano.shared(np.asarray(train_data_y, dtype='int32')) #theano.shared(np.asarray(pd.get_dummies(train_data_y).as_matrix(), dtype='int32')) # get test x,y, and id test_data_x, test_data_y, test_data_id = test_data test_ten_x, test_ten_y = theano.shared(np.asarray(test_data_x, dtype=theano.config.floatX)), \ theano.shared(np.asarray(test_data_y, dtype='int32')) #theano.shared(np.asarray(pd.get_dummies(test_data_y).as_matrix(), dtype='int32')) # get validation x,y and id validation_data_x, validation_data_y, validation_data_id = validation_data validation_ten_x, validation_ten_y = theano.shared(np.asarray(validation_data_x, dtype=theano.config.floatX)), \ theano.shared(np.asarray(validation_data_y, dtype='int32')) #theano.shared(np.asarray(pd.get_dummies(validation_data_y).as_matrix(), dtype='int32')) ten_data_set = [(train_ten_x, train_ten_y), (validation_ten_x, validation_ten_y), (test_ten_x, test_ten_y)] # compute number of minibatches for training, validation and testing n_train_batches = train_ten_x.get_value(borrow=True).shape[0] / batch_size print("n_train_batches: " + str(n_train_batches)) cols = train_data_x.shape[1] assert (train_data_x.shape[1] == test_data_x.shape[1] and test_data_x.shape[1] == validation_data_x.shape[1]) print("cols: " + str(cols)) print("train_x" + str(train_ten_x.get_value(borrow=True).shape)) print("train_y" + str(train_ten_y.get_value(borrow=True).shape)) print("valid_x" + str(validation_ten_x.get_value(borrow=True).shape)) print("valid_y" + str(validation_ten_y.get_value(borrow=True).shape)) print("test_x" + str(test_ten_x.get_value(borrow=True).shape)) print("test_y" + str(test_ten_y.get_value(borrow=True).shape)) # numpy random generator numpy_rng = np.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=cols, hidden_layers_sizes=[cols * 10, cols * 10, cols * 10], n_outs=5) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_ten_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = timeit.default_timer() ## Pre-train layer-wise for i in range(dbn.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print np.mean(c) end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=ten_data_set, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # if patience <= iter: # done_looping = True # break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))