class DBN(object): """ An implement of deep belief network The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal MLP by adding a output layer. """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i-1].output input_size = hidden_layers_sizes[i-1] # Sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer self.rbm_layers.append(RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, X_train, batch_size=50, pretraining_epochs=10, lr=0.1, k=1, display_step=1): """ Pretrain the layers (just train the RBM layers) :param sess: tf.Session :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist) :param batch_size: int :param lr: float :param k: int, use CD-k :param pretraining_epoch: int :param display_step: int """ print('Starting pretraining...\n') start_time = timeit.default_timer() batch_num = int(X_train.train.num_examples / batch_size) # Pretrain layer by layer for i in range(self.n_layers): cost = self.rbm_layers[i].get_reconstruction_cost() train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) for epoch in range(pretraining_epochs): avg_cost = 0.0 for j in range(batch_num): x_batch, _ = X_train.train.next_batch(batch_size) # 训练 sess.run(train_ops, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num # 输出 if epoch % display_step == 0: print("\tPretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) end_time = timeit.default_timer() print("\nThe pretraining process ran for {0} minutes".format((end_time - start_time) / 60)) def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, lr=0.1, display_step=1): """ Finetuing the network """ print("\nStart finetuning...\n") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize( self.cost, var_list=self.params) for epoch in range(training_epochs): avg_cost = 0.0 batch_num = int(trainSet.train.num_examples / batch_size) for i in range(batch_num): x_batch, y_batch = trainSet.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) # 计算cost avg_cost += sess.run(self.cost, feed_dict= {self.x: x_batch, self.y: y_batch}) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, self.y: trainSet.validation.labels}) print("\tEpoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) end_time = timeit.default_timer() print("\nThe finetuning process ran for {0} minutes".format((end_time - start_time) / 60))
class DBN(object): """ An implement of deep belief network The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal MLP by adding a output layer. """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN with tf.name_scope('DBN_layer'): for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i - 1].output input_size = hidden_layers_sizes[i - 1] # Sigmoid layer with tf.name_scope('internel_layer'): sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer with tf.name_scope('rbm_layer'): self.rbm_layers.append( RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer with tf.name_scope('output_layer'): self.output_layer = LogisticRegression( inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost with tf.name_scope('output_loss'): self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, train_x, batch_size=50, pretraining_epochs=10, lr=0.5, k=1, display_step=1): """ Pretrain the layers (just train the RBM layers) :param sess: tf.Session :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist) :param batch_size: int :param lr: float :param k: int, use CD-k :param pretraining_epoch: int :param display_step: int """ print('Starting pretraining...\n') start_time = timeit.default_timer() # Pretrain layer by layer for i in range(self.n_layers): cost = self.rbm_layers[i].get_reconstruction_cost() train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) batch_num = int(train_x.shape[0] / batch_size) for epoch in range(pretraining_epochs): avg_cost = 0.0 for step in range(batch_num - 1): # 训练 x_batch = train_x[batch_num * batch_size:(batch_num + 1) * batch_size] sess.run(train_ops, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={ self.x: x_batch, }) # 输出 if epoch % display_step == 0: print("\tPretraing layer {0} Epoch {1} cost: {2}".format( i, epoch, avg_cost)) end_time = timeit.default_timer() print("\nThe pretraining process ran for {0} minutes".format( (end_time - start_time) / 60)) def finetuning(self, sess, train_x, train_y, test_x, test_y, training_epochs=10, batch_size=100, lr=0.5, display_step=1): """ Finetuing the network """ print("\nStart finetuning...\n") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer( learning_rate=lr).minimize(self.cost) batch_num = int(train_x.shape[0] / batch_size) merged = tf.summary.merge_all() writer = tf.summary.FileWriter("logs", sess.graph) for epoch in range(training_epochs): avg_cost = 0.0 for step in range(batch_num - 1): x_batch = train_x[batch_num * batch_size:(batch_num + 1) * batch_size] y_batch = train_y[batch_num * batch_size:(batch_num + 1) * batch_size] # 训练 sess.run(train_op, feed_dict={ self.x: x_batch, self.y: y_batch }) # 计算cost avg_cost += sess.run(self.cost, feed_dict={ self.x: x_batch, self.y: y_batch }) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={ self.x: test_x, self.y: test_y }) print("\tEpoch {0} cost: {1}, validation accuacy: {2}".format( epoch, avg_cost, val_acc)) result = sess.run(merged, feed_dict={ self.x: test_x, self.y: test_y }) # 输出 writer.add_summary(result, epoch) end_time = timeit.default_timer() print("\nThe finetuning process ran for {0} minutes".format( (end_time - start_time) / 60))
# the output layer layer4_output = LogisticRegression(layer3_dropout.output, n_in=256, n_out=10) # params for training params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params # train dicts for dropout train_dicts = layer3_dropout.train_dicts # prediction dicts for dropout pred_dicts = layer3_dropout.pred_dicts # get cost cost = layer4_output.cost(y_) # accuracy accuracy = layer4_output.accuarcy(y_) predictor = layer4_output.y_pred # 定义训练器 train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( cost, var_list=params) # 初始化所有变量 init = tf.global_variables_initializer() # 定义训练参数 training_epochs = 10 batch_size = 100 display_step = 1 # 开始训练 print("Start to train...")
class SdA(object): """ Stacked denoising autoencoder class the model is constructed by stacking several dAs the dA layers are used to initialize the network, after pre-training, the SdA is similar to a normal MLP """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=(500, 500), corruption_levels=(0.1, 0.1)): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes :param corruption_levels: list or tuple, the corruption lever for each layer """ assert len(hidden_layers_sizes) >= 1 assert len(hidden_layers_sizes) == len(corruption_levels) self.corruption_levels = corruption_levels self.n_layers = len(hidden_layers_sizes) # define the layers self.layers = [] # the normal layers self.dA_layers = [] # the dA layers self.params = [] # params # define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # construct the layers for i in range(self.n_layers): if i == 0: # the input layer input_size = n_in layer_input = self.x else: input_size = hidden_layers_sizes[i-1] layer_input = self.layers[i-1].output # create the sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # create the da layer dA_layer = DA(inpt=layer_input, n_hidden=hidden_layers_sizes[i], n_visiable=input_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # collect the params self.params.extend(sigmoid_layer.params) # add the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # the finetuning cost self.finetune_cost = self.output_layer.cost(self.y) # the accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, X_train, pretraining_epochs=10, batch_size=100, learning_rate=0.001, display_step=1): """ Pretrain the layers :param sess: tf.Session :param X_train: the input of the train set :param batch_size: int :param learning_rate: float """ print('Starting pretraining...') start_time = timeit.default_timer() batch_num = int(X_train.train.num_examples / batch_size) for i in range(self.n_layers): # pretraining layer by layer cost = self.dA_layers[i].get_cost(corruption_level=self.corruption_levels[i]) params = self.dA_layers[i].params train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, var_list=params) for epoch in range(pretraining_epochs): avg_cost = 0.0 for j in range(batch_num): x_batch, _ = X_train.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num # 输出 if epoch % display_step == 0: print("Pretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) end_time = timeit.default_timer() print("The pretraining process ran for {0}m".format((end_time - start_time) / 60)) def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, learning_rate=0.1, display_step=1): """Finetuing the network""" print("Start finetuning...") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( self.finetune_cost, var_list=self.params) for epoch in range(training_epochs): avg_cost = 0.0 batch_num = int(trainSet.train.num_examples / batch_size) for i in range(batch_num): x_batch, y_batch = trainSet.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) # 计算cost avg_cost += sess.run(self.finetune_cost, feed_dict= {self.x: x_batch, self.y: y_batch}) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, self.y: trainSet.validation.labels}) print(" Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) end_time = timeit.default_timer() print("The finetuning process ran for {0}m".format((end_time - start_time) / 60))
# dropout layer layer3_dropout = DropoutLayer(layer3_fullyconn.output, keep_prob=0.5) # the output layer layer4_output = LogisticRegression(layer3_dropout.output, n_in=256, n_out=10) # params for training params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params # train dicts for dropout train_dicts = layer3_dropout.train_dicts # prediction dicts for dropout pred_dicts = layer3_dropout.pred_dicts # get cost cost = layer4_output.cost(y_) # accuracy accuracy = layer4_output.accuarcy(y_) predictor = layer4_output.y_pred # 定义训练器 train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( cost, var_list=params) # 初始化所有变量 init = tf.global_variables_initializer() # 定义训练参数 training_epochs = 10 batch_size = 100 display_step = 1 # 开始训练 print("Start to train...")