def main(_): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) # Create the model x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.matmul(x, W) + b # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) # The raw formulation of cross-entropy(交叉熵), # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. # # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw # outputs of 'y', and then average across the batch. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess = tf.InteractiveSession() tf.global_variables_initializer().run() # Train for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print( sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels }))
def __init__(self, CKPT_DIR): self.CKPT_DIR = CKPT_DIR self.net = NetworkMnist() self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.data = input_data.read_data_sets(DATA_DIR, one_hot=True)
def train2(layers, learning_rate=0.005, minibatch_size=100, iterations=2000 + 1): # Use local mnist dataset (60k for train, 10k for test) mnist = input_data.read_data_sets("mnist", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="x_placeholder") # correct answers will go here Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y_placeholder") # step for variable learning rate iter = tf.placeholder(tf.int32) # # Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time # pkeep = tf.placeholder(tf.float32) # train/test selector for batch normalisation tst = tf.placeholder(tf.bool) # five layers and their number of neurons (tha last layer has 10 softmax neurons) L = 200 M = 100 N = 60 P = 30 Q = 10 W1 = tf.Variable(tf.truncated_normal([784, L], stddev=0.1)) # 784 = 28 * 28 S1 = tf.Variable(tf.ones([L])) O1 = tf.Variable(tf.zeros([L])) W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1)) S2 = tf.Variable(tf.ones([M])) O2 = tf.Variable(tf.zeros([M])) W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1)) S3 = tf.Variable(tf.ones([N])) O3 = tf.Variable(tf.zeros([N])) W4 = tf.Variable(tf.truncated_normal([N, P], stddev=0.1)) S4 = tf.Variable(tf.ones([P])) O4 = tf.Variable(tf.zeros([P])) W5 = tf.Variable(tf.truncated_normal([P, Q], stddev=0.1)) B5 = tf.Variable(tf.zeros([Q])) def batchnorm(Ylogits, Offset, Scale, is_test, iteration): exp_moving_avg = tf.train.ExponentialMovingAverage(0.998, iteration) # adding the iteration prevents from averaging across non-existing iterations bnepsilon = 1e-5 mean, variance = tf.nn.moments(Ylogits, [0]) update_moving_averages = exp_moving_avg.apply([mean, variance]) m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean) v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance) Ybn = tf.nn.batch_normalization(Ylogits, m, v, Offset, Scale, bnepsilon) return Ybn, update_moving_averages def no_batchnorm(Ylogits, Offset, Scale, is_test, iteration): return Ylogits, tf.no_op() # The model XX = tf.reshape(X, [-1, 784]) Y1l = tf.matmul(XX, W1) Y1bn, update_ema1 = batchnorm(Y1l, O1, S1, tst, iter) Y1 = tf.nn.sigmoid(Y1bn) Y2l = tf.matmul(Y1, W2) Y2bn, update_ema2 = batchnorm(Y2l, O2, S2, tst, iter) Y2 = tf.nn.sigmoid(Y2bn) Y3l = tf.matmul(Y2, W3) Y3bn, update_ema3 = batchnorm(Y3l, O3, S3, tst, iter) Y3 = tf.nn.sigmoid(Y3bn) Y4l = tf.matmul(Y3, W4) Y4bn, update_ema4 = batchnorm(Y4l, O4, S4, tst, iter) Y4 = tf.nn.sigmoid(Y4bn) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4) # Loss is defined as cross entropy between the prediction and the real value cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_, name="lossFunction") cross_entropy = tf.reduce_mean(cross_entropy) * minibatch_size # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") # training, learning rate = 0.005 # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy, name="gradDescent") # train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy, name="adam") # training step # the learning rate is: # 0.0001 + 0.003 * (1/e)^(step/2000)), i.e. exponential decay from 0.003->0.0001 lr = 0.0001 + tf.train.exponential_decay(0.003, iter, 2000, 1 / math.e) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # save data train_indexes = [] train_costs = [] train_accuracies = [] test_indexes = [] test_costs = [] test_accuracies = [] # Feed the next batch and run the training for i in range(iterations): # training on batches of 100 images with 100 labels batch_X, batch_Y = mnist.train.next_batch(minibatch_size) # compute training values if i % 10 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={X: batch_X, Y_: batch_Y, iter: i, tst: False}) train_indexes.append(i) train_costs.append(cost) train_accuracies.append(acc) print(str(i) + ": accuracy:" + str(acc) + " loss: " + str(cost)) # compute test values if i % 50 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={X: mnist.test.images, Y_: mnist.test.labels, tst: True}) test_indexes.append(i) test_costs.append(cost) test_accuracies.append(acc) print(str(i) + ": ********* epoch " + str( i * minibatch_size // mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str( acc) + " test loss: " + str(cost)) # the backpropagation training step sess.run([train_step, update_ema], feed_dict={X: batch_X, Y_: batch_Y, iter: i, tst: False}) print("max test accuracy: " + str(max(test_accuracies))) lrate = sess.run(lr, feed_dict={X: mnist.test.images, Y_: mnist.test.labels, iter: iterations - 1, tst: False}) # plot train and test costs and accuracies plt.figure(figsize=(12, 4)) plt.subplot(121) plt.plot(np.squeeze(train_indexes), np.squeeze(train_accuracies), label="train_accuracy") plt.plot(np.squeeze(test_indexes), np.squeeze(test_accuracies), label="test_accuracy") plt.legend() plt.ylabel('accuracy') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) plt.subplot(122) plt.plot(np.squeeze(train_indexes), np.squeeze(train_costs), label="train_costs") plt.plot(np.squeeze(test_indexes), np.squeeze(test_costs), label="test_costs") plt.legend() plt.ylabel('costs') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) # plt.show() plt.savefig( "output/learning_rate " + str(lrate) + " iterations " + str(iterations) + " bn max test accuracy " + str( max(test_accuracies)) + " .png") sess.close()
def train4(minibatch_size=100, iterations=2000 + 1): # Use local mnist dataset (60k for train, 10k for test) mnist = input_data.read_data_sets("mnist", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="x_placeholder") # correct answers will go here Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y_placeholder") # step for variable learning rate step = tf.placeholder(tf.int32) # three convolutional layers with their channel counts, and a # fully connected layer (tha last layer has 10 softmax neurons) K = 6 # first convolutional layer output depth L = 12 # second convolutional layer output depth M = 24 # third convolutional layer N = 200 # fully connected layer W1 = tf.Variable(tf.truncated_normal( [5, 5, 1, K], stddev=0.1)) # 5x5 patch, 1 input channel, K output channels B1 = tf.Variable(tf.ones([K]) / 10) W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1)) B2 = tf.Variable(tf.ones([L]) / 10) W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1)) B3 = tf.Variable(tf.ones([M]) / 10) W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1)) B4 = tf.Variable(tf.ones([N]) / 10) W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1)) B5 = tf.Variable(tf.ones([10]) / 10) # The model stride = 1 # output is 28x28 Y1 = tf.nn.relu( tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) stride = 2 # output is 14x14 Y2 = tf.nn.relu( tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2) stride = 2 # output is 7x7 Y3 = tf.nn.relu( tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3) # reshape the output from the third convolution for the fully connected layer YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) # Loss is defined as cross entropy between the prediction and the real value cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=Ylogits, labels=Y_, name="lossFunction") cross_entropy = tf.reduce_mean(cross_entropy) * minibatch_size # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") # training step # the learning rate is: # 0.0001 + 0.003 * (1/e)^(step/2000)), i.e. exponential decay from 0.003->0.0001 lr = 0.0001 + tf.train.exponential_decay(0.003, step, 2000, 1 / math.e) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # save data train_indexes = [] train_costs = [] train_accuracies = [] test_indexes = [] test_costs = [] test_accuracies = [] # Feed the next batch and run the training for i in range(iterations): # training on batches of 100 images with 100 labels batch_X, batch_Y = mnist.train.next_batch(minibatch_size) # compute training values if i % 10 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: batch_X, Y_: batch_Y, step: i }) train_indexes.append(i) train_costs.append(cost) train_accuracies.append(acc) print(str(i) + ": accuracy:" + str(acc) + " loss: " + str(cost)) # compute test values if i % 50 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: mnist.test.images, Y_: mnist.test.labels }) test_indexes.append(i) test_costs.append(cost) test_accuracies.append(acc) print( str(i) + ": ********* epoch " + str(i * minibatch_size // mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(acc) + " test loss: " + str(cost)) # the backpropagation training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y, step: i}) print("max test accuracy: " + str(max(test_accuracies))) lrate = sess.run(lr, feed_dict={ X: mnist.test.images, Y_: mnist.test.labels, step: iterations - 1 }) # plot train and test costs and accuracies plt.figure(figsize=(12, 4)) plt.subplot(121) plt.plot(np.squeeze(train_indexes), np.squeeze(train_accuracies), label="train_accuracy") plt.plot(np.squeeze(test_indexes), np.squeeze(test_accuracies), label="test_accuracy") plt.legend() plt.ylabel('accuracy') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) plt.subplot(122) plt.plot(np.squeeze(train_indexes), np.squeeze(train_costs), label="train_costs") plt.plot(np.squeeze(test_indexes), np.squeeze(test_costs), label="test_costs") plt.legend() plt.ylabel('costs') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) # plt.show() outfig = "output/learning_rate " + str(lrate) + " iterations " + str( iterations) + " max test accuracy " + str( max(test_accuracies)) + " .png" plt.savefig(outfig) print("out figure's name: ", outfig) sess.close()
def train4(minibatch_size=100, iterations=2000 + 1): # Use local mnist dataset (60k for train, 10k for test) mnist = input_data.read_data_sets("mnist", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="x_placeholder") # correct answers will go here Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y_placeholder") # step for variable learning rate iter = tf.placeholder(tf.int32) # Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time pkeep = tf.placeholder(tf.float32) pkeep_conv = tf.placeholder(tf.float32) # test flag for batch norm tst = tf.placeholder(tf.bool) def batchnorm(Ylogits, is_test, iteration, offset, convolutional=False): exp_moving_avg = tf.train.ExponentialMovingAverage( 0.999, iteration ) # adding the iteration prevents from averaging across non-existing iterations bnepsilon = 1e-5 if convolutional: mean, variance = tf.nn.moments(Ylogits, [0, 1, 2]) else: mean, variance = tf.nn.moments(Ylogits, [0]) update_moving_averages = exp_moving_avg.apply([mean, variance]) m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean) v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance) Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon) return Ybn, update_moving_averages def no_batchnorm(Ylogits, is_test, iteration, offset, convolutional=False): return Ylogits, tf.no_op() def compatible_convolutional_noise_shape(Y): noiseshape = tf.shape(Y) noiseshape = noiseshape * tf.constant([1, 0, 0, 1]) + tf.constant( [0, 1, 1, 0]) return noiseshape # three convolutional layers with their channel counts, and a # fully connected layer (tha last layer has 10 softmax neurons) K = 6 # first convolutional layer output depth L = 12 # second convolutional layer output depth M = 24 # third convolutional layer N = 200 # fully connected layer W1 = tf.Variable(tf.truncated_normal( [5, 5, 1, K], stddev=0.1)) # 5x5 patch, 1 input channel, K output channels B1 = tf.Variable(tf.ones([K]) / 10) W2 = tf.Variable(tf.truncated_normal([4, 4, K, L], stddev=0.1)) B2 = tf.Variable(tf.ones([L]) / 10) W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1)) B3 = tf.Variable(tf.ones([M]) / 10) W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1)) B4 = tf.Variable(tf.ones([N]) / 10) W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1)) B5 = tf.Variable(tf.ones([10]) / 10) # The model stride = 1 # output is 28x28 Y1l = tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') Y1bn, update_ema1 = batchnorm(Y1l, tst, iter, B1, convolutional=True) Y1r = tf.nn.relu(Y1bn) Y1 = tf.nn.dropout( Y1r, pkeep_conv) #, compatible_convolutional_noise_shape(Y1r)) stride = 2 # output is 14x14 Y2l = tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2, convolutional=True) Y2r = tf.nn.relu(Y2bn) Y2 = tf.nn.dropout(Y2r, pkeep_conv) stride = 2 # output is 7x7 Y3l = tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') Y3bn, update_ema3 = batchnorm(Y3l, tst, iter, B3, convolutional=True) Y3r = tf.nn.relu(Y3bn) Y3 = tf.nn.dropout(Y3r, pkeep_conv) # reshape the output from the third convolution for the fully connected layer YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) Y4l = tf.matmul(YY, W4) Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4) Y4r = tf.nn.relu(Y4bn) Y4 = tf.nn.dropout(Y4r, pkeep) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4) # Loss is defined as cross entropy between the prediction and the real value cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=Ylogits, labels=Y_, name="lossFunction") cross_entropy = tf.reduce_mean(cross_entropy) * minibatch_size # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") # training step # the learning rate is: # 0.0001 + 0.003 * (1/e)^(step/2000)), i.e. exponential decay from 0.003->0.0001 lr = 0.0001 + tf.train.exponential_decay(0.003, iter, 2000, 1 / math.e) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # save data train_indexes = [] train_costs = [] train_accuracies = [] test_indexes = [] test_costs = [] test_accuracies = [] # Feed the next batch and run the training for i in range(iterations): # training on batches of 100 images with 100 labels batch_X, batch_Y = mnist.train.next_batch(minibatch_size) # compute training values if i % 10 == 0: acc, cost = sess.run( [accuracy, cross_entropy], feed_dict={ X: batch_X, Y_: batch_Y, iter: i, tst: False, pkeep: 1.0, pkeep_conv: 1.0 }) train_indexes.append(i) train_costs.append(cost) train_accuracies.append(acc) print(str(i) + ": accuracy:" + str(acc) + " loss: " + str(cost)) # compute test values if i % 50 == 0: acc, cost = sess.run( [accuracy, cross_entropy], feed_dict={ X: mnist.test.images, Y_: mnist.test.labels, tst: True, pkeep: 1.0, pkeep_conv: 1.0 }) test_indexes.append(i) test_costs.append(cost) test_accuracies.append(acc) print( str(i) + ": ********* epoch " + str(i * minibatch_size // mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(acc) + " test loss: " + str(cost)) # the backpropagation training step sess.run( train_step, { X: batch_X, Y_: batch_Y, tst: False, iter: i, pkeep: 0.75, pkeep_conv: 1.0 }) sess.run( update_ema, { X: batch_X, Y_: batch_Y, tst: False, iter: i, pkeep: 1.0, pkeep_conv: 1.0 }) print("max test accuracy: " + str(max(test_accuracies))) lrate = sess.run(lr, feed_dict={ X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0, iter: iterations - 1 }) # plot train and test costs and accuracies plt.figure(figsize=(12, 4)) plt.subplot(121) plt.plot(np.squeeze(train_indexes), np.squeeze(train_accuracies), label="train_accuracy") plt.plot(np.squeeze(test_indexes), np.squeeze(test_accuracies), label="test_accuracy") plt.legend() plt.ylabel('accuracy') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) plt.subplot(122) plt.plot(np.squeeze(train_indexes), np.squeeze(train_costs), label="train_costs") plt.plot(np.squeeze(test_indexes), np.squeeze(test_costs), label="test_costs") plt.legend() plt.ylabel('costs') plt.xlabel('iterations') plt.title("Learning rate =" + str(lrate)) # plt.show() outfig = "output/learning_rate " + str(lrate) + " iterations " + str( iterations) + " bn dropout max test accuracy " + str( max(test_accuracies)) + " .png" plt.savefig(outfig) print("out figure's name: ", outfig) sess.close()
import shutil from image_recognition.utils import ensure_dir_exists from mnist import input_data import tensorflow as tf tmp_filename = "/tmp/mnist_simple_model_logs" ensure_dir_exists(tmp_filename) mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) with tf.name_scope("Wx_b") as scope: y = tf.nn.softmax(tf.matmul(x, W) + b) with tf.name_scope("xent") as scope: cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) ce_summ = tf.scalar_summary("cross entropy", cross_entropy) with tf.name_scope("train") as scope: train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accuracy_summary = tf.scalar_summary("accuracy", accuracy)
import tensorflow as tf from mnist import input_data import matplotlib.pyplot as plt import numpy as np mnist = input_data.read_data_sets("./mnist") X_train = tf.placeholder(tf.float32, [None, 784]) y_train = tf.placeholder(tf.float32, [None, 1]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(X_train, W) + b) cross_entropy = -tf.reduce_sum(y_train * tf.log(y)) train = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) batch_xs = np.array(batch_xs).reshape(-1, 784) batch_ys = np.array(batch_ys).reshape(-1, 1) print(batch_xs.shape, batch_ys.shape) sess.run(train, feed_dict={X_train: batch_xs, y_train: batch_ys}) if i % 50 == 0: print(sess.run(cross_entropy))
import mnist.input_data as input_data import tensorflow as tf Mnist = input_data.read_data_sets("data/",one_hot = True) x = tf.placeholder("float",[None,784]) W = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x,W) + b) y_ = tf.placeholder("float",[None,10]) cross_entropy = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) for i in range(1000): batch_xs, batch_ys = Mnist.train.next_batch(100) #print(i) #print(batch_xs) #print(batch_ys) sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys}) correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) acccuracy = tf.reduce_mean(tf.cast(correct_prediction,"float")) print(sess.run(acccuracy,feed_dict={x:Mnist.test.images,y_:Mnist.test.labels}))
#-*- coding:utf-8 -*- # 导入TensotFlow import tensorflow as tf # 导入MNIST教学的模块 from mnist import input_data # 与之前一样,读入MNIST数据 mnist = input_data.read_data_sets("MNIST_data/",one_hot=True) # 创建x,x是一个占位符(placeholder),代表待识别的图片 x = tf.placeholder(tf.float32, [None, 784]) # W是Softmax模型的参数,将一个784维得输入转换为一个10维的输出 # 在TensorFlow中,变量的参数用tf.Variable表示 W = tf.Variable(tf.zeros([784, 10])) # b是有一个Softmax模型的参数,一般叫做:偏置项(bias) b = tf.Variable(tf.zeros([10])) # y表示模型的输出 y = tf.nn.softmax(tf.matmul(x, W) +b) # y_是实际的图像标签,同样以占位符表示 y_ = tf.placeholder(tf.float32, [None, 10]) # 至此,得到了两个重要的Tensor:y和y_ # y是模型的输出,y_是实际的图像标签,注意y_是独热表示的 # 下面会根据y和y_构造交叉熵损失 cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y))) # 有了损失,就可以用梯度下降法针对模型的参数(W和b)进行优化 train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
def train2(layers, learning_rate=0.005, minibatch_size=100, iterations=2000 + 1): # Use local mnist dataset (60k for train, 10k for test) mnist = input_data.read_data_sets("mnist", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="x_placeholder") # correct answers will go here Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y_placeholder") # step for variable learning rate step = tf.placeholder(tf.int32) # flatten XX = tf.reshape(X, shape=[-1, 784]) parameters = {} L = len(layers) for l in range(1, L): # 1,...,L-1 # Weights initialised with small random values between -0.2 and +0.2 parameters["W" + str(l)] = tf.Variable( tf.truncated_normal([layers[l - 1], layers[l]], stddev=0.1)) parameters["b" + str(l)] = tf.Variable(tf.ones([layers[l]]) / 10) # The model if l == 1: # 第一层的输入为 XX parameters["Y" + str(l)] = tf.nn.sigmoid( tf.matmul(XX, parameters["W" + str(l)]) + parameters["b" + str(l)]) elif l == L - 1: # 最后一层的输出不计算激活值 parameters["Y" + str(l)] = tf.matmul(parameters["Y" + str(l - 1)], parameters["W" + str(l)]) + \ parameters["b" + str(l)] else: # 其余层输入为上一层的输出 parameters["Y" + str(l)] = tf.nn.sigmoid( tf.matmul(parameters["Y" + str(l - 1)], parameters["W" + str(l)]) + parameters["b" + str(l)]) Y = tf.nn.softmax(parameters["Y" + str(L - 1)]) # Loss is defined as cross entropy between the prediction and the real value cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=parameters["Y" + str(L - 1)], labels=Y_, name="lossFunction") cross_entropy = tf.reduce_mean(cross_entropy) * minibatch_size # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") # training, learning rate = 0.005 # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy, name="gradDescent") # train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy, name="adam") # training step # the learning rate is: # 0.0001 + 0.003 * (1/e)^(step/2000)), i.e. exponential decay from 0.003->0.0001 lr = 0.0001 + tf.train.exponential_decay(0.003, step, 2000, 1 / math.e) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # save data train_indexes = [] train_costs = [] train_accuracies = [] test_indexes = [] test_costs = [] test_accuracies = [] # Feed the next batch and run the training for i in range(iterations): # training on batches of 100 images with 100 labels batch_X, batch_Y = mnist.train.next_batch(minibatch_size) # compute training values if i % 10 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: batch_X, Y_: batch_Y, step: i }) train_indexes.append(i) train_costs.append(cost) train_accuracies.append(acc) print(str(i) + ": accuracy:" + str(acc) + " loss: " + str(cost)) # compute test values if i % 50 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: mnist.test.images, Y_: mnist.test.labels }) test_indexes.append(i) test_costs.append(cost) test_accuracies.append(acc) print( str(i) + ": ********* epoch " + str(i * minibatch_size // mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(acc) + " test loss: " + str(cost)) # the backpropagation training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y, step: i}) print("max test accuracy: " + str(max(test_accuracies))) # plot train and test costs and accuracies plt.figure(figsize=(12, 4)) plt.subplot(121) plt.plot(np.squeeze(train_indexes), np.squeeze(train_accuracies), label="train_accuracy") plt.plot(np.squeeze(test_indexes), np.squeeze(test_accuracies), label="test_accuracy") plt.legend() plt.ylabel('accuracy') plt.xlabel('iterations') plt.title("Learning rate =" + str(learning_rate)) plt.subplot(122) plt.plot(np.squeeze(train_indexes), np.squeeze(train_costs), label="train_costs") plt.plot(np.squeeze(test_indexes), np.squeeze(test_costs), label="test_costs") plt.legend() plt.ylabel('costs') plt.xlabel('iterations') plt.title("Learning rate =" + str(learning_rate)) # plt.show() plt.savefig("output/learning_rate " + str(learning_rate) + " iterations " + str(iterations) + " max test accuracy " + str(max(test_accuracies)) + " .png") sess.close()
import math import glog as log import cPickle as pickle import struct import model FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('dataset', 'omniglot', """mnist or omniglot""") tf.app.flags.DEFINE_string('gpu_id', 0, """which gpu to train on""") tf.app.flags.DEFINE_string('save_dir', '.', """which gpu to train on""") if __name__ == "__main__": data = input_data.read_data_sets("MNIST_data/", one_hot=False) model_save_path = FLAGS.save_dir if not os.path.exists(model_save_path): os.makedirs(model_save_path) # hyper-parameters c_num = 128 batch_num = 50 max_step = 25000 test_batch_num = 1000 # to avoid out of memory test_cnt = len(data.test.images) log.info('training on %d images' % len(data.train.images)) # with tf.variable_scope('Model') as scope: mnist_cnn = model.CNN(batch_norm_flag=True)
import tensorflow as tf import time from mnist import input_data mnist = input_data.read_data_sets('../mnist/MNIST_data/', one_hot=True) start_time = time.time() def weight_variable(shape): return tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight') def bias_variable(shape): return tf.Variable(tf.constant(0.1, shape=shape),name='bias') def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME',name='conv2d') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='max_pool') # 定义数据 input_x = tf.placeholder('float', shape=[None, 784],name='input_x') input_y = tf.placeholder('float', shape=[None, 10],name='input_y') # 第一层,卷积层 L1 = { 'x': tf.reshape(input_x, [-1, 28, 28, 1],name='reshape28x28'),
def main(argv=None): # 声明处理MNIST数据集的类,这个类会在初始化时自动下载数据 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) train(mnist)
#-*- coding:utf-8 -*- #引入数据导入模块 from mnist import input_data #从MNIST_data/中读取MNIST数据,这条语句在数据不存在是,会自动执行下载 mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 查看各个变量形状大小 print(mnist.train.images.shape) # (55000, 784) print(mnist.train.labels.shape) # (55000, 10) # 查看验证数据的大小 print(mnist.validation.images.shape) # (5000, 784) print(mnist.validation.labels.shape) # (5000, 10) # 查看测试数据的大小 print(mnist.test.images.shape) # (10000, 784) print(mnist.test.labels.shape) # (10000, 10) # 打印第0张图片的向量表示 print(mnist.train.images[0, :]) # 打印第0张训练图片的标签 print(mnist.train.labels[0, :])
# -*- coding: utf-8 -*- # __author__ = "zok" [email protected] # Date: 2019-10-13 Python: 3.7 import os import tensorflow as tf from mnist import model # 线性回归的方式导入 mnist 数据集 from mnist.input_data import read_data_sets data = read_data_sets('MNIST_data', one_hot=True) # 建立模型 with tf.variable_scope("regression"): x = tf.placeholder(tf.float32, [None, 784]) y, variables = model.regression(x) # 训练 y_ = tf.placeholder("float", [None, 10]) cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 准确率 # 保存 saver = tf.train.Saver(variables) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(1000): batch_xs, batch_ys = data.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary Tensor based on the TF collection of Summaries. summary = tf.summary.merge_all() # Add the variable initializer Op. init = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def train1(learning_rate=0.005, minibatch_size=100, iterations=2000 + 1): # Use local mnist dataset (60k for train, 10k for test) mnist = input_data.read_data_sets("mnist", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="x_placeholder") # correct answers will go here Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y_placeholder") # weights W[784, 10] 784=28*28 W = tf.Variable(tf.zeros([784, 10]), name="weights_variable") # biases b[10] b = tf.Variable(tf.zeros([10]), name="bias_variable") # flatten XX = tf.reshape(X, shape=[-1, 784]) # The model Ylogits = tf.matmul(XX, W) + b Y = tf.nn.softmax(Ylogits) # Loss is defined as cross entropy between the prediction and the real value cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=Ylogits, labels=Y_, name="lossFunction") cross_entropy = tf.reduce_mean(cross_entropy) * minibatch_size # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") # training, learning rate = 0.005 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( cross_entropy, name="gradDescent") # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # save data train_indexes = [] train_costs = [] train_accuracies = [] test_indexes = [] test_costs = [] test_accuracies = [] # Feed the next batch and run the training for i in range(iterations): # training on batches of 100 images with 100 labels batch_X, batch_Y = mnist.train.next_batch(minibatch_size) # compute training values if i % 10 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: batch_X, Y_: batch_Y }) train_indexes.append(i) train_costs.append(cost) train_accuracies.append(acc) print(str(i) + ": accuracy:" + str(acc) + " loss: " + str(cost)) # compute test values if i % 50 == 0: acc, cost = sess.run([accuracy, cross_entropy], feed_dict={ X: mnist.test.images, Y_: mnist.test.labels }) test_indexes.append(i) test_costs.append(cost) test_accuracies.append(acc) print( str(i) + ": ********* epoch " + str(i * minibatch_size // mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(acc) + " test loss: " + str(cost)) # the backpropagation training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) print("max test accuracy: " + str(max(test_accuracies))) # plot train and test costs and accuracies plt.figure(figsize=(12, 4)) plt.subplot(121) plt.plot(np.squeeze(train_indexes), np.squeeze(train_accuracies), label="train_accuracy") plt.plot(np.squeeze(test_indexes), np.squeeze(test_accuracies), label="test_accuracy") plt.legend() plt.ylabel('accuracy') plt.xlabel('iterations') plt.title("Learning rate =" + str(learning_rate)) plt.subplot(122) plt.plot(np.squeeze(train_indexes), np.squeeze(train_costs), label="train_costs") plt.plot(np.squeeze(test_indexes), np.squeeze(test_costs), label="test_costs") plt.legend() plt.ylabel('costs') plt.xlabel('iterations') plt.title("Learning rate =" + str(learning_rate)) # plt.show() plt.savefig("output/learning_rate " + str(learning_rate) + " iterations " + str(iterations) + " max test accuracy " + str(max(test_accuracies)) + " .png") sess.close()