def main(_): mnist = read_data_sets(FLAGS.data_dir, one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) W1 = tf.Variable(tf.random_normal([784, 256])) b1 = tf.Variable(tf.random_normal([256])) W2 = tf.Variable(tf.random_normal([256, 10])) b2 = tf.Variable(tf.random_normal([10])) lay1 = tf.nn.relu(tf.matmul(x, W1) + b1) y = tf.add(tf.matmul(lay1, W2),b2) y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess = tf.InteractiveSession() tf.global_variables_initializer().run() for index in range(10000): # print('process the {}th batch'.format(index)) start_train = time.time() batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # print('the {0} batch takes time: {1}'.format(index, time.time()-start_train)) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def main(args): if args and args[0] == 'cnn': network_type = 'convolutional' reshape_data = False neural_network = network.ConvNet(max_epochs=1000) all_configurations = list(itertools.product(*configurations_cnn)) # create all possible combinations headers = ['1 layer', '2 layer', 'Epochs', 'Accuracy', 'Training time'] else: network_type = 'simple' reshape_data = True neural_network = network.SimpleNet(max_epochs=1000) all_configurations = list(itertools.product(*configurations_simple_nn)) # create all possible combinations headers = ['Activation', 'Hidden neurons', 'Epochs', 'Accuracy', 'Training time'] # --- setup logging to file and stdout root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) log_formatter = logging.Formatter("%(message)s") file_name = "mnist_" + network_type + "_log_{}".format(datetime.now().strftime("%Y%m%d-%H%M%S")) file_handler = logging.FileHandler("logs/{0}.log".format(file_name)) file_handler.setFormatter(log_formatter) root_logger.addHandler(file_handler) stream_handler = logging.StreamHandler() stream_handler.setFormatter(log_formatter) root_logger.addHandler(stream_handler) # --- load MNIST data mnist = read_data_sets('.\\data', one_hot=True, reshape=reshape_data) root_logger.info('Loaded MNIST data!') root_logger.info(network_type.title() + ' neural network training starts:\n') # --- start training total_elapsed_time = 0.0; best_accuracy = 0.0; best_conf = None; all_results = [] for conf in all_configurations: neural_network.build(configuration=conf) stats = None while stats is None: stats = neural_network.train(data=mnist, checkpoints=checkpoints) elapsed_time = stats[len(stats) - 1][2] total_elapsed_time += elapsed_time for item in stats: all_results.append([conf[0], conf[1], item[0], item[1], item[2]]) all_results.append([]) accuracy = max([row[1] for row in stats]) # take max value from all checkpoints print('Training finished. Configuration: {}. Accuracy: {:.4f}, Time: {:.1f} sec' .format(conf, accuracy, elapsed_time)) if best_accuracy < accuracy: best_accuracy = accuracy best_conf = conf # --- log results to file root_logger.info(tabulate(all_results, headers=headers, floatfmt=".4f", numalign='center', stralign='center')) root_logger.info('----------------------------------------------------------------------') root_logger.info('Best accuracy: {:.4f}, configuration: {}'.format(best_accuracy, best_conf)) root_logger.info('Total elapsed time: {:.0f} minutes, {:.1f} seconds' .format(total_elapsed_time // 60, total_elapsed_time % 60)) root_logger.info('----------------------------------------------------------------------') logging.shutdown()
def main(argv): # Get the data. data_sets = mnist.read_data_sets(FLAGS.directory, dtype=tf.uint8, reshape=False) # Convert to Examples and write the result to TFRecords. convert_to(data_sets.train, "train") convert_to(data_sets.validation, "validation") convert_to(data_sets.test, "test")
def get_data(): mnist = read_data_sets("../data/", one_hot=True, reshape=True, validation_size=2000) X_train = mnist.train.images X_val = mnist.validation.images Y_train = mnist.train.labels Y_val = mnist.validation.labels print(X_train.shape, X_train.shape, Y_val.shape, Y_train.shape) return X_train, X_val, Y_val, Y_train
def main(_): # _ : unused_argv # class type: Dataset data_sets = mnist.read_data_sets(cfg.FLAGS.directory, dtype=tf.uint8, reshape=False, validation_size=cfg.FLAGS.validation_size) convert_to_tfrecords(data_sets.train, 'train') convert_to_tfrecords(data_sets.validation, 'valid') convert_to_tfrecords(data_sets.test, 'test')
def main(argv): import pdb;pdb.set_trace() # Get the data. data_sets = mnist.read_data_sets(FLAGS.directory, dtype=tf.uint8, reshape=False) # Convert to Examples and write the result to TFRecords. convert_to(data_sets.train, 'train') convert_to(data_sets.validation, 'validation') convert_to(data_sets.test, 'test')
def maybe_download_and_convert(data_dir): # Get the data. data_sets = mnist.read_data_sets(data_dir, dtype=tf.uint8, reshape=False) # Convert to Examples and write the result to TFRecords. filenames = [ os.path.join(data_dir, name + '.tfrecords') for name in ['train', 'validation', 'test'] ] for idx, f in enumerate(filenames): if not tf.gfile.Exists(f): convert_to_tfr(data_sets[idx], f)
def main(unused_argv): # Get the data. data_sets = mnist.read_data_sets(FLAGS.directory, dtype=tf.uint8, reshape=False, validation_size=FLAGS.validation_size) # Convert to Examples and write the result to TFRecords. convert_to(data_sets.train, 'train') convert_to(data_sets.validation, 'validation') convert_to(data_sets.test, 'test')
def main(): mnist = read_data_sets('/tmp/tensorflow/mnist/input_data', one_hot=True) size = 28 n = size * size k = 10 x = tf.placeholder(tf.float32, (None, n)) y = tf.placeholder(tf.float32, (None, k)) keep_prob = tf.placeholder(tf.float32) optimizer, z = create_cnn(size, n, k, x, y, keep_prob) # optimizer, z = create_perceptron(size, n, k, x, y, keep_prob) session = tf.Session() session.run(tf.global_variables_initializer()) for i in range(1000): print(i) x_data, y_data = mnist.train.next_batch(100) session.run(optimizer, {x: x_data, y: y_data, keep_prob: 0.5}) # x_data, y_data = mnist.test.images, mnist.test.labels correct = tf.equal(tf.argmax(y, 1), tf.argmax(z, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) result = session.run(accuracy, {x: x_data, y: y_data, keep_prob: 1.0}) print(result) # def draw(image): print('--' * 28) for i in range(28): offset = i * 28 lines = image[offset:offset + 28] print(''.join(['%2d' % (j * 10) for j in lines])) z_data = session.run(tf.argmax(z, 1), {x: x_data, y: y_data, keep_prob: 1.0}) corrects = session.run(correct, {x: x_data, y: y_data, keep_prob: 1.0}) count = 3 for i in range(len(corrects)): if corrects[i]: continue draw(x_data[i]) print('right: %d / predict: %d' % (list(y_data[i]).index(1), z_data[i])) count -= 1 if count is 0: break
def build_input_pipeline(data_dir, batch_size, heldout_size, mnist_type): """Builds an Iterator switching between train and heldout data.""" # Build an iterator over training batches. if mnist_type in [MnistType.FAKE_DATA, MnistType.THRESHOLD]: if mnist_type == MnistType.FAKE_DATA: mnist_data = build_fake_data() else: mnist_data = mnist.read_data_sets(data_dir) training_dataset = tf.data.Dataset.from_tensor_slices( (mnist_data.train.images, np.int32(mnist_data.train.labels))) heldout_dataset = tf.data.Dataset.from_tensor_slices( (mnist_data.validation.images, np.int32(mnist_data.validation.labels))) elif mnist_type == MnistType.BERNOULLI: training_dataset = load_bernoulli_mnist_dataset(data_dir, "train") heldout_dataset = load_bernoulli_mnist_dataset(data_dir, "valid") else: raise ValueError("Unknown MNIST type.") training_batches = training_dataset.repeat().batch(batch_size) training_iterator = training_batches.make_one_shot_iterator() # Build a iterator over the heldout set with batch_size=heldout_size, # i.e., return the entire heldout set as a constant. heldout_frozen = (heldout_dataset.take(heldout_size). repeat().batch(heldout_size)) heldout_iterator = heldout_frozen.make_one_shot_iterator() # Combine these into a feedable iterator that can switch between training # and validation inputs. handle = tf.placeholder(tf.string, shape=[]) feedable_iterator = tf.data.Iterator.from_string_handle( handle, training_batches.output_types, training_batches.output_shapes) images, labels = feedable_iterator.get_next() # Reshape as a pixel image and binarize pixels. images = tf.reshape(images, shape=[-1] + IMAGE_SHAPE) if mnist_type in [MnistType.FAKE_DATA, MnistType.THRESHOLD]: images = tf.cast(images > 0.5, dtype=tf.int32) return images, labels, handle, training_iterator, heldout_iterator
def main(_): print("Downloading and reading data sets...") mnist = read_data_sets(FLAGS.data_dir, one_hot=True) # Create the model x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.matmul(x, W) + b y_ = tf.placeholder(tf.float32, [None, 10]) # Define loss and optimizer # The raw formulation of cross-entropy, # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), # reduction_indices=[1])) # can be numerically unstable. # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw # outputs of 'y', and then average across the batch. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y, y_)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess = tf.InteractiveSession() tf.initialize_all_variables().run() # Train print("training for %s steps" % FLAGS.num_steps) for _ in xrange(FLAGS.num_steps): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print("accuracy: %s " % sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def main(_): # Import data mnist = read_data_sets('/tmp/tensorflow/mnist/input_data', one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.matmul(x, W) + b y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) trainer = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) with tf.Session() as sess: tf.global_variables_initializer().run() for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(trainer, feed_dict={x: batch_xs, y_: batch_ys}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def run(): # evaluate accuracy, for test and valid sets def evaluation(images, true_labels): eval_batch_size = 100 predicted_labels = [] # evaluate in batch sizes of 100 for start_index in range(0, len(images), eval_batch_size): end_index = start_index + eval_batch_size batch_x = images[start_index:end_index] batch_predicted_labels = sess.run(prediction, feed_dict={ x: batch_x, is_training: False }) predicted_labels += list(batch_predicted_labels) predicted_labels = np.vstack(predicted_labels).flatten() true_labels = true_labels.flatten() accuracy = float((predicted_labels == true_labels).astype( np.int32).sum()) / len(images) # return predicted labels and the accuracy in comparison to the true labels return predicted_labels, accuracy # set iteration parameters EPOCHS = 10 BATCH_SIZE = 64 NUM_ITERS = int(55000 / BATCH_SIZE * EPOCHS) # get MNIST dataset mnist = read_data_sets('data', one_hot=False) x_train, y_train = (mnist.train._images, mnist.train._labels.reshape((-1, 1))) x_test, y_test = (mnist.test._images, mnist.test.labels.reshape((-1, 1))) x_valid, y_valid = (mnist.validation._images, mnist.validation._labels.reshape((-1, 1))) # use convenient class for iterating over MNIST dataset randomly train_set = DatasetIterator(x_train, y_train, BATCH_SIZE) test_set = DatasetIterator(x_test, y_test, BATCH_SIZE) valid_set = DatasetIterator(x_valid, y_valid, BATCH_SIZE) tf.reset_default_graph() x = tf.placeholder(tf.float32, (None, 784)) y = tf.placeholder(tf.int32, (None, 1)) is_labeled = tf.placeholder(tf.float32, (None, 1)) is_training = tf.placeholder(tf.bool, ()) one_hot_y = tf.one_hot(y, 10) # training variables rate = 0.001 recon, logits = AutoEncoder(x, is_training=is_training) prediction = tf.argmax(logits, axis=1) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y) * is_labeled) recon_loss = tf.reduce_mean( (recon - x)**2) # loss between output of AutoEncoder and original input tensor loss_operation = cross_entropy + recon_loss optimizer = tf.train.AdamOptimizer(learning_rate=rate) grads_and_vars = optimizer.compute_gradients( loss_operation, tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) training_operation = optimizer.apply_gradients(grads_and_vars) # train sess = tf.Session() sess.run(tf.global_variables_initializer()) print("Training...") for i in range(NUM_ITERS): # get next batch batch_x, batch_y, batch_is_labeled = train_set.next_batch() # evaluate next batch _ = sess.run(training_operation, feed_dict={ x: batch_x, y: batch_y, is_labeled: batch_is_labeled, is_training: True }) if (i + 1) % 1000 == 0 or i == NUM_ITERS - 1: # validate _, validation_accuracy = evaluation(valid_set.x, valid_set.y) print("Iter {}: Validation Accuracy = {:.3f}".format( i, validation_accuracy)) # test print('Evaluating on test set') _, test_accuracy = evaluation(test_set.x, test_set.y) print("Test Accuracy = {:.3f}".format(test_accuracy)) sess.close() return test_accuracy
def compute_y(x): # X1 = tf.placeholder(dtype='float', shape=[None, 392]) # X2 = tf.placeholder(dtype='float', shape=[None, 392]) X1, X2 = splitX(x) with tf.variable_scope("share_weight") as scope: out1 = X_W(X1) #允许变量共享 scope.reuse_variables() out2 = X_W(X2) y = tf.nn.softmax(out1 + out2 + b) # 预测值 return y mnist = read_data_sets("data/", one_hot=True) x = tf.placeholder(dtype='float', shape=[None, 784]) b = tf.Variable(tf.zeros([10])) y = compute_y(x) y_ = tf.placeholder(dtype='float', shape=[None, 10]) #真实值 #计算交叉熵 cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer( learning_rate=0.01).minimize(cross_entropy) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) step = 500
def main(_): """Build the full graph for feeding inputs, training, and saving checkpoints. Run the training. Then, load the saved graph and run some predictions.""" # Get input data: get the sets of images and labels for training, # validation, and test on MNIST. data_sets = read_data_sets(FLAGS.data_dir, False) mnist_graph = tf.Graph() with mnist_graph.as_default(): # Generate placeholders for the images and labels. images_placeholder = tf.placeholder(tf.float32) labels_placeholder = tf.placeholder(tf.int32) tf.add_to_collection("images", images_placeholder) # Remember this Op. tf.add_to_collection("labels", labels_placeholder) # Remember this Op. # Build a Graph that computes predictions from the inference model. logits = mnist_inference(images_placeholder, HIDDEN1_UNITS, HIDDEN2_UNITS) tf.add_to_collection("logits", logits) # Remember this Op. # Add to the Graph the Ops that calculate and apply gradients. train_op, loss = mnist_training(logits, labels_placeholder, 0.01) # prediction accuracy _, indices_op = tf.nn.top_k(logits) flattened = tf.reshape(indices_op, [-1]) correct_prediction = tf.cast(tf.equal(labels_placeholder, flattened), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Define info to be used by the SummaryWriter. This will let # TensorBoard plot values during the training process. loss_summary = tf.scalar_summary("loss", loss) acc_summary = tf.scalar_summary("accuracy", accuracy) train_summary_op = tf.merge_summary([loss_summary, acc_summary]) # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a summary writer. print("Writing Summaries to %s" % FLAGS.model_dir) train_summary_writer = tf.train.SummaryWriter(FLAGS.model_dir) # Uncomment the following line to see what we have constructed. # tf.train.write_graph(tf.get_default_graph().as_graph_def(), # "/tmp", "complete.pbtxt", as_text=True) # Run training for MAX_STEPS and save checkpoint at the end. with tf.Session(graph=mnist_graph) as sess: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.num_steps): # Read a batch of images and labels. images_feed, labels_feed = data_sets.train.next_batch(BATCH_SIZE) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value, tsummary, acc = sess.run( [train_op, loss, train_summary_op, accuracy], feed_dict={ images_placeholder: images_feed, labels_placeholder: labels_feed }) if step % 100 == 0: # Write summary info train_summary_writer.add_summary(tsummary, step) if step % 1000 == 0: # Print loss/accuracy info print('----Step %d: loss = %.4f' % (step, loss_value)) print("accuracy: %s" % acc) print("\nWriting checkpoint file.") checkpoint_file = os.path.join(FLAGS.model_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) _, loss_value = sess.run( [train_op, loss], feed_dict={ images_placeholder: data_sets.test.images, labels_placeholder: data_sets.test.labels }) print("Test set loss: %s" % loss_value) # Run evaluation based on the saved checkpoint. with tf.Session(graph=tf.Graph()) as sess: checkpoint_file = tf.train.latest_checkpoint(FLAGS.model_dir) print("\nRunning evaluation based on saved checkpoint.") print("checkpoint file: {}".format(checkpoint_file)) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Retrieve the Ops we 'remembered'. logits = tf.get_collection("logits")[0] images_placeholder = tf.get_collection("images")[0] labels_placeholder = tf.get_collection("labels")[0] # Add an Op that chooses the top k predictions. eval_op = tf.nn.top_k(logits) # Run evaluation. images_feed, labels_feed = data_sets.validation.next_batch( EVAL_BATCH_SIZE) prediction = sess.run(eval_op, feed_dict={ images_placeholder: images_feed, labels_placeholder: labels_feed }) for i in range(len(labels_feed)): print("Ground truth: %d\nPrediction: %d" % (labels_feed[i], prediction.indices[i][0]))
# \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10] # · · · · · · · · Y [batch, 10] # The model is: # # Y = softmax( X * W + b) # X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch) # W: weight matrix with 784 lines and 10 columns # b: bias vector with 10 dimensions # +: add with broadcasting: adds the vector to each line of the matrix (numpy) # softmax(matrix) applies softmax on each line # softmax(line) applies an exp to each value then divides by the norm of the resulting line # Y: output matrix with 100 lines and 10 columns # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) mnist = read_data_sets("data", one_hot=True, reshape=False) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # correct answers will go here Y_ = tf.placeholder(tf.float32, [None, 10]) # weights W[784, 10] 784=28*28 W = tf.Variable(tf.zeros([784, 10])) # biases b[10] b = tf.Variable(tf.zeros([10])) # flatten the images into a single line of pixels # -1 in the shape definition means "the only possible dimension that will preserve the number of elements" XX = tf.reshape(X, [-1, 784]) # The model
# digits and create a simple CNN network to predict the # digit category (0-9) import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets from tensorflow.python.framework import ops ops.reset_default_graph() # Start a graph session sess = tf.Session() # Load data data_dir = 'temp' mnist = read_data_sets(data_dir) # Convert images into 28x28 (they are downloaded as 1x784) train_xdata = np.array([np.reshape(x, (28,28)) for x in mnist.train.images]) test_xdata = np.array([np.reshape(x, (28,28)) for x in mnist.test.images]) # Convert labels into one-hot encoded vectors train_labels = mnist.train.labels test_labels = mnist.test.labels # Set model parameters batch_size = 100 learning_rate = 0.005 evaluation_size = 500 image_width = train_xdata[0].shape[0] image_height = train_xdata[0].shape[1]
# Accuracy in the model for the iteration accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) # Gradient descent # Partial derivatives are calculated automatically optimizer = tf.train.GradientDescentOptimizer(0.003) # Size of the gradient step for each iteration - it's currently consistent train_step = optimizer.minimize(cross_entropy) # We need to start the session so all the Lazy setup gets executed sess = tf.Session() sess.run(init) # Get the images for the actual test from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0) # We begin the actual computation for i in range(1000): # We load the images in the placeholders batch_X, batch_Y = mnist.train.next_batch(100) # And we create our results dictionary # Updating training data train_data = {X: batch_X, Y_: batch_Y} a, c = sess.run([accuracy, cross_entropy], feed_dict=train_data) print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c)) # Updating test data test_data = {X: mnist.test.images, Y_: mnist.test.labels}
def main(argv): del argv # unused if tf.gfile.Exists(FLAGS.model_dir): tf.logging.warning( "Warning: deleting old log directory at {}".format(FLAGS.model_dir)) tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) if FLAGS.fake_data: mnist_data = build_fake_data() else: mnist_data = mnist.read_data_sets(FLAGS.data_dir, reshape=False) with tf.Graph().as_default(): (images, labels, handle, training_iterator, heldout_iterator) = build_input_pipeline( mnist_data, FLAGS.batch_size, mnist_data.validation.num_examples) # Build a Bayesian LeNet5 network. We use the Flipout Monte Carlo estimator # for the convolution and fully-connected layers: this enables lower # variance stochastic gradients than naive reparameterization. with tf.name_scope("bayesian_neural_net", values=[images]): neural_net = tf.keras.Sequential([ tfp.layers.Convolution2DFlipout(6, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.MaxPooling2D(pool_size=[2, 2], strides=[2, 2], padding="SAME"), tfp.layers.Convolution2DFlipout(16, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.MaxPooling2D(pool_size=[2, 2], strides=[2, 2], padding="SAME"), tfp.layers.Convolution2DFlipout(120, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.Flatten(), tfp.layers.DenseFlipout(84, activation=tf.nn.relu), tfp.layers.DenseFlipout(10) ]) logits = neural_net(images) labels_distribution = tfd.Categorical(logits=logits) # Compute the -ELBO as the loss, averaged over the batch size. neg_log_likelihood = -tf.reduce_mean(labels_distribution.log_prob(labels)) kl = sum(neural_net.losses) / mnist_data.train.num_examples elbo_loss = neg_log_likelihood + kl # Build metrics for evaluation. Predictions are formed from a single forward # pass of the probabilistic layers. They are cheap but noisy predictions. predictions = tf.argmax(logits, axis=1) accuracy, accuracy_update_op = tf.metrics.accuracy( labels=labels, predictions=predictions) # Extract weight posterior statistics for layers with weight distributions # for later visualization. names = [] qmeans = [] qstds = [] for i, layer in enumerate(neural_net.layers): try: q = layer.kernel_posterior except AttributeError: continue names.append("Layer {}".format(i)) qmeans.append(q.mean()) qstds.append(q.stddev()) with tf.name_scope("train"): opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_op = opt.minimize(elbo_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Run the training loop. train_handle = sess.run(training_iterator.string_handle()) heldout_handle = sess.run(heldout_iterator.string_handle()) for step in range(FLAGS.max_steps): _ = sess.run([train_op, accuracy_update_op], feed_dict={handle: train_handle}) if step % 100 == 0: loss_value, accuracy_value = sess.run( [elbo_loss, accuracy], feed_dict={handle: train_handle}) print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f}".format( step, loss_value, accuracy_value)) if (step+1) % FLAGS.viz_steps == 0: # Compute log prob of heldout set by averaging draws from the model: # p(heldout | train) = int_model p(heldout|model) p(model|train) # ~= 1/n * sum_{i=1}^n p(heldout | model_i) # where model_i is a draw from the posterior p(model|train). probs = np.asarray([sess.run((labels_distribution.probs), feed_dict={handle: heldout_handle}) for _ in range(FLAGS.num_monte_carlo)]) mean_probs = np.mean(probs, axis=0) image_vals, label_vals = sess.run((images, labels), feed_dict={handle: heldout_handle}) heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]), label_vals.flatten()])) print(" ... Held-out nats: {:.3f}".format(heldout_lp)) qm_vals, qs_vals = sess.run((qmeans, qstds)) if HAS_SEABORN: plot_weight_posteriors(names, qm_vals, qs_vals, fname=os.path.join( FLAGS.model_dir, "step{:05d}_weights.png".format(step))) plot_heldout_prediction(image_vals, probs, fname=os.path.join( FLAGS.model_dir, "step{:05d}_pred.png".format(step)), title="mean heldout logprob {:.2f}" .format(heldout_lp))
import os import shutil import uuid import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets import mnist import numpy as np def write_tfrecords(tfrecords_path, dataset): with tf.python_io.TFRecordWriter(tfrecords_path) as record_writer: for image, label in zip(dataset.images, dataset.labels): example = tf.train.Example(features=tf.train.Features(feature={ #"image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image.tostring()])), "image": tf.train.Feature(float_list=tf.train.FloatList(value=image.flatten().astype(np.float32))), "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label)])) })) record_writer.write(example.SerializeToString()) dataset_dir = "data/" compressed_files_dir = str(uuid.uuid4()) datasets = mnist.read_data_sets(compressed_files_dir, dtype=tf.uint8, reshape=True, validation_size=10000) write_tfrecords(os.path.join(dataset_dir, "mnist.train"), datasets.train) write_tfrecords(os.path.join(dataset_dir, "mnist.eval"), datasets.validation) write_tfrecords(os.path.join(dataset_dir, "mnist.test"), datasets.test) shutil.rmtree(compressed_files_dir)
#for deep learning course in NCTU 2017 #if you have questions, mail to [email protected] #the code demonstrate the NIN arch on MNIST #It achieved 0.41% test error (the original paper reported 0.47% test error) from __future__ import print_function import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets mnist = read_data_sets('MNIST_data', one_hot=True) # define functions def weight_variable(shape): initial = tf.random_normal(shape, stddev=0.05, dtype=tf.float32) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0, shape=shape,dtype=tf.float32) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_3x3(x): return tf.nn.max_pool(x, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME') #define placeholder x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32)
def main(argv=None): data = read_data_sets('MNIST_data', one_hot=True) evaluate(data)
dtype=tf.float32, name='biases') self.params = [self.weights1, self.biases1, self.weights2, self.biases2, self.sm_w, self.sm_b] def __call__(self, images): """Run the model's forward prop on `images`.""" hidden1 = tf.nn.relu(tf.matmul(images, self.weights1) + self.biases1) hidden2 = tf.nn.relu(tf.matmul(hidden1, self.weights2) + self.biases2) logits = tf.matmul(hidden2, self.sm_w) + self.sm_b return logits model = Model(128, 32) data = read_data_sets('/tmp/mnist_train') def get_test_accuracy(): """Gets the model's classification accuracy on test data.""" num_examples = data.test.num_examples test_images = np.split(data.test.images, num_examples/BATCH_SIZE) test_labels = np.split(data.test.labels.astype(np.int32), num_examples/BATCH_SIZE) num_correct = 0 for _, (images, labels) in enumerate(zip(test_images, test_labels)): with tf.new_step(): logits = model(images) predictions = tf.argmax(tf.nn.softmax(logits), axis=1) num_correct += np.sum(predictions.value == labels) return float(num_correct) / float(num_examples)
def train(): # Train a Variational Autoencoder on MNIST # Input placeholders with tf.name_scope('data'): x = tf.placeholder(tf.float32, [None, 28, 28, 1]) tf.summary.image('data', x) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor(distributions.Normal(mu=q_mu, sigma=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the # generative network p_x_given_z_logits = generative_network(z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.Bernoulli(logits=p_x_given_z_logits) posterior_predictive_samples = p_x_given_z.sample() tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32)) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.Normal(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), sigma=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_x_given_z_logits = generative_network(z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.Bernoulli(logits=p_x_given_z_logits) prior_predictive_samples = prior_predictive.sample() tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32)) # Take samples from the prior with a placeholder with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_logits = generative_network(z=z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.Bernoulli(logits=p_x_given_z_logits) prior_predictive_inp_sample = prior_predictive_inp.sample() # Build the evidence lower bound (ELBO) or the negative loss kl = tf.reduce_sum(distributions.kl(q_z.distribution, p_z), 1) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(x), [1, 2, 3]) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001) train_op = optimizer.minimize(-elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) mnist = read_data_sets(FLAGS.data_dir, one_hot=True) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) # Get fixed MNIST digits for plotting posterior means during training np_x_fixed, np_y = mnist.test.next_batch(5000) np_x_fixed = np_x_fixed.reshape(5000, 28, 28, 1) np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) for i in range(FLAGS.n_iterations): # Re-binarize the data at every batch; this improves results np_x, _ = mnist.train.next_batch(FLAGS.batch_size) np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1) np_x = (np_x > 0.5).astype(np.float32) sess.run(train_op, {x: np_x}) # Print progress and save samples every so often t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # Save samples np_posterior_samples, np_prior_samples = sess.run( [posterior_predictive_samples, prior_predictive_samples], {x: np_x}) for k in range(FLAGS.n_samples): f_name = os.path.join( FLAGS.logdir, 'iter_%d_posterior_predictive_%d_data.jpg' % (i, k)) imsave(f_name, np_x[k, :, :, 0]) f_name = os.path.join( FLAGS.logdir, 'iter_%d_posterior_predictive_%d_sample.jpg' % (i, k)) imsave(f_name, np_posterior_samples[k, :, :, 0]) f_name = os.path.join( FLAGS.logdir, 'iter_%d_prior_predictive_%d.jpg' % (i, k)) imsave(f_name, np_prior_samples[k, :, :, 0]) # Plot the posterior predictive space if FLAGS.latent_dim == 2: np_q_mu = sess.run(q_mu, {x: np_x_fixed}) cmap = mpl.colors.ListedColormap(sns.color_palette("husl")) f, ax = plt.subplots(1, figsize=(6 * 1.1618, 6)) im = ax.scatter(np_q_mu[:, 0], np_q_mu[:, 1], c=np.argmax(np_y, 1), cmap=cmap, alpha=0.7) ax.set_xlabel('First dimension of sampled latent variable $z_1$') ax.set_ylabel('Second dimension of sampled latent variable mean $z_2$') ax.set_xlim([-10., 10.]) ax.set_ylim([-10., 10.]) f.colorbar(im, ax=ax, label='Digit class') plt.tight_layout() plt.savefig(os.path.join(FLAGS.logdir, 'posterior_predictive_map_frame_%d.png' % i)) plt.close() nx = ny = 20 x_values = np.linspace(-3, 3, nx) y_values = np.linspace(-3, 3, ny) canvas = np.empty((28*ny, 28*nx)) for ii, yi in enumerate(x_values): for j, xi in enumerate(y_values): np_z = np.array([[xi, yi]]) x_mean = sess.run(prior_predictive_inp_sample, {z_input: np_z}) canvas[(nx-ii-1)*28:(nx-ii)*28, j*28:(j+1)*28] = x_mean[0].reshape(28, 28) imsave(os.path.join(FLAGS.logdir, 'prior_predictive_map_frame_%d.png' % i), canvas) # plt.figure(figsize=(8, 10)) # Xi, Yi = np.meshgrid(x_values, y_values) # plt.imshow(canvas, origin="upper") # plt.tight_layout() # plt.savefig() # Make the gifs if FLAGS.latent_dim == 2: os.system( 'convert -delay 15 -loop 0 {0}/posterior_predictive_map_frame*png {0}/posterior_predictive.gif' .format(FLAGS.logdir)) os.system( 'convert -delay 15 -loop 0 {0}/prior_predictive_map_frame*png {0}/prior_predictive.gif' .format(FLAGS.logdir))
''' A Reccurent Neural Network (LSTM) implementation example using TensorFlow library. This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/) Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf Author: Aymeric Damien Project: https://github.com/aymericdamien/TensorFlow-Examples/ ''' import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets mnist = read_data_sets("/tmp/MNIST/", one_hot=True) ''' To classify images using a reccurent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. ''' # Parameters learning_rate = 0.001 training_iters = 1000 batch_size = 128 display_step = 10 # Network Parameters n_input = 28 # MNIST data input (img shape: 28*28) n_steps = 28 # timesteps n_hidden = 128 # hidden layer num of features n_classes = 10 # MNIST total classes (0-9 digits)
min_after_dequeue = 3000 # If `enqueue_many` is `False`, `tensors` is assumed to represent a # single example. An input tensor with shape `[x, y, z]` will be output # as a tensor with shape `[batch_size, x, y, z]`. # # If `enqueue_many` is `True`, `tensors` is assumed to represent a # batch of examples, where the first dimension is indexed by example, # and all members of `tensors` should have the same size in the # first dimension. If an input tensor has shape `[*, x, y, z]`, the # output will have shape `[batch_size, x, y, z]`. enqueue_many = True cache_dir = os.path.expanduser( os.path.join('~', '.keras', 'datasets', 'MNIST-data')) data = mnist.read_data_sets(cache_dir, validation_size=0) x_train_batch, y_train_batch = tf.train.shuffle_batch( tensors=[data.train.images, data.train.labels.astype(np.int32)], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, enqueue_many=enqueue_many, num_threads=8) x_train_batch = tf.cast(x_train_batch, tf.float32) x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) y_train_batch = tf.cast(y_train_batch, tf.int32) y_train_batch = tf.one_hot(y_train_batch, num_classes)
import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg import math import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets mnist = read_data_sets("data/", one_hot=True, reshape=False) tf.set_random_seed(10) X = tf.placeholder(tf.float32, [None, 28, 28, 1]) Y_ = tf.placeholder(tf.float32, [None, 10]) lr = tf.placeholder(tf.float32) K = 4 L = 8 M = 12 N = 200 W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.1)) B1 = tf.Variable(tf.ones([K]) / 10) W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1)) B2 = tf.Variable(tf.ones([L]) / 10) W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1)) B3 = tf.Variable(tf.ones([M]) / 10) W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1)) B4 = tf.Variable(tf.ones([N]) / 10)
G_loss = tf.reduce_mean(-D_fake) tf.summary.scalar('D_loss', D_loss) tf.summary.scalar('G_loss', G_loss) D_solver = tf.train.RMSPropOptimizer(learning_rate).minimize( D_loss, var_list=D_weights) G_solver = tf.train.RMSPropOptimizer(learning_rate2).minimize( G_loss, global_step=global_step, var_list=G_weights) clip_weights = [ tf.assign(w, tf.clip_by_value(w, -0.01, 0.01)) for w in D_weights ] return X, D_solver, G_solver, clip_weights, D_loss, G_out, Z_ mnist = read_data_sets(input_data_dir, fake_data=False) graph = tf.Graph() sess = tf.Session(graph=graph) # If we're training the model for the first time. if restore == False: with graph.as_default(): X, D_s, G_s, clip, D_l, G_out, Z_ = build_graph() summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=3) # Add these values (tensors) to collection to facilitate model restore. tf.add_to_collection("X", X) tf.add_to_collection("D_s", D_s) tf.add_to_collection("G_s", G_s) for t in clip:
def training( mnist_image_pixels=IMAGE_PIXELS, batch_size=100, num_hidden1_units=128, num_hidden2_units=32, num_classes=10, learning_rate=.01, train_dir='data', max_steps=2000, ): data_sets = read_data_sets(train_dir) images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mnist_image_pixels)) labels_placeholder = tf.placeholder( #tf.int32, tf.int64, shape=(batch_size)) with tf.name_scope('hidden1'): W, b = get_W_b(mnist_image_pixels, num_hidden1_units) hidden1 = tf.nn.relu(tf.matmul(images_placeholder, W) + b) with tf.name_scope('hidden2'): W, b = get_W_b(num_hidden1_units, num_hidden2_units) hidden2 = tf.nn.relu(tf.matmul(hidden1, W) + b) with tf.name_scope('softmax_linear'): W, b = get_W_b(num_hidden2_units, num_classes) logits = tf.matmul(hidden2, W) + b #labels_placeholder = tf.to_int64(labels_placeholder) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, labels_placeholder, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='xentorpy_mean') tf.scalar_summary(loss.op.name, loss) optimizer = tf.train.GradientDescentOptimizer(learning_rate) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) correct = tf.nn.in_top_k(logits, labels_placeholder, 1) eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) summary = tf.merge_all_summaries() init = tf.initialize_all_variables() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.train.SummaryWriter(train_dir, sess.graph) sess.run(init) for step in range(max_steps): start_time = time.time() feed_dict = get_feed_dict(data_sets.train, images_placeholder, labels_placeholder, batch_size) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step {:d}: loss = {:.2f} ({:.3f} sec)'.format( step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == max_steps: checkpoint_file = os.path.join(train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train, batch_size) print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation, batch_size) print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test, batch_size)
@author: tomhope """ from __future__ import print_function import os import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets import mnist import numpy as np save_dir = "D:\\mnist" # Download data to save_dir data_sets = mnist.read_data_sets(save_dir, dtype=tf.uint8, reshape=False, validation_size=1000) data_splits = ["train", "test", "validation"] for d in range(len(data_splits)): print("saving " + data_splits[d]) data_set = data_sets[d] filename = os.path.join(save_dir, data_splits[d] + '.tfrecords') writer = tf.python_io.TFRecordWriter(filename) for index in range(data_set.images.shape[0]): image = data_set.images[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tf.train.Feature( int64_list=tf.train.Int64List(
def main(_): # create the cluster configured by `ps_hosts' and 'worker_hosts' cluster = tf.train.ClusterSpec(clusterSpec) # create a server for local task server = tf.train.Server(cluster, job_name=jobName, task_index=taskIndex) if jobName == "ps": server.join() # ps hosts only join elif jobName == "worker": # workers perform the operation # ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy(FLAGS.num_ps) # Note: tf.train.replica_device_setter automatically place the paramters (Variables) # on the ps hosts (default placement strategy: round-robin over all ps hosts, and also # place multi copies of operations to each worker host with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % (taskIndex), cluster=cluster)): # load mnist dataset mnist = read_data_sets("./dataset", one_hot=True) # the model images = tf.placeholder(tf.float32, [None, 784]) labels = tf.placeholder(tf.int32, [None, 10]) logits = model(images) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) # The StopAtStepHook handles stopping after running given steps. hooks = [tf.train.StopAtStepHook(last_step=2000)] global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=1e-04) if FLAGS.is_sync: # asynchronous training # use tf.train.SyncReplicasOptimizer wrap optimizer # ref: https://www.tensorflow.org/api_docs/python/tf/train/SyncReplicasOptimizer optimizer = tf.train.SyncReplicasOptimizer(optimizer, replicas_to_aggregate=FLAGS.num_workers, total_num_replicas=FLAGS.num_workers) # create the hook which handles initialization and queues hooks.append(optimizer.make_session_run_hook((FLAGS.task_index == 0))) train_op = optimizer.minimize(loss, global_step=global_step, aggregation_method=tf.AggregationMethod.ADD_N) # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(FLAGS.task_index == 0), checkpoint_dir="./checkpoint_dir", hooks=hooks) as mon_sess: while not mon_sess.should_stop(): # mon_sess.run handles AbortedError in case of preempted PS. img_batch, label_batch = mnist.train.next_batch(32) _, ls, step = mon_sess.run([train_op, loss, global_step], feed_dict={images: img_batch, labels: label_batch}) if step % 100 == 0: print("Train step %d, loss: %f" % (step, ls))
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = loss_funct(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Run the Op to initialize the variables. sess.run(init) # training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
BATCH_SIZE = 100 EVAL_BATCH_SIZE = 1 # Number of units in hidden layers. HIDDEN1_UNITS = 128 HIDDEN2_UNITS = 32 # Maximum number of training steps. MAX_STEPS = 2000 # Directory to put the training data. TRAIN_DIR="/Users/ruichen/Documents/Algorithms/DL/TensorFlowTutorials/tmp/mnist" # 2.3 Get input data: get the sets of images and labels for training, validation, and # test on MNIST. data_sets = read_data_sets(TRAIN_DIR, False) # 2.4 Build inference graph. def mnist_inference(images, hidden1_units, hidden2_units): """Build the MNIST model up to where it may be used for inference. Args: images: Images placeholder. hidden1_units: Size of the first hidden layer. hidden2_units: Size of the second hidden layer. Returns: logits: Output tensor with the computed logits. """ # Hidden 1 with tf.name_scope('hidden1'): weights = tf.Variable( tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
def main(unused_args): assert len(unused_args) == 1, unused_args setup_experiment(logging, FLAGS, "critic_model") if FLAGS.validation: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=0) val_ds = mnist_ds.test else: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=FLAGS.validation_size) val_ds = mnist_ds.validation train_ds = mnist_ds.train val_ds = mnist_ds.validation test_ds = mnist_ds.test num_classes = FLAGS.num_classes img_shape = [None, 1, 28, 28] X = tf.placeholder(tf.float32, shape=img_shape, name='X') # placeholder to avoid recomputation of adversarial images for critic X_hat_h = tf.placeholder(tf.float32, shape=img_shape, name='X_hat') y = tf.placeholder(tf.int32, shape=[None], name='y') y_onehot = tf.one_hot(y, num_classes) reduce_ind = list(range(1, X.get_shape().ndims)) # test/validation inputs X_v = tf.placeholder(tf.float32, shape=img_shape, name='X_v') y_v = tf.placeholder(tf.int32, shape=[None], name='y_v') y_v_onehot = tf.one_hot(y_v, num_classes) # classifier model model = create_model(FLAGS, name=FLAGS.model_name) def test_model(x, **kwargs): return model(x, train=False, **kwargs) # generator def generator(inputs, confidence, targets=None): return high_confidence_attack_unrolled( lambda x: model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.attack_iter, over_shoot=FLAGS.attack_overshoot, attack_random=FLAGS.attack_random, attack_uniform=FLAGS.attack_uniform, attack_label_smoothing=FLAGS.attack_label_smoothing) def test_generator(inputs, confidence, targets=None): return high_confidence_attack(lambda x: test_model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.df_iter, over_shoot=FLAGS.df_overshoot, random=FLAGS.attack_random, uniform=FLAGS.attack_uniform, clip_dist=FLAGS.df_clip) # discriminator critic = create_model(FLAGS, prefix='critic_', name='critic') # classifier outputs outs_x = model(X) outs_x_v = test_model(X_v) params = tf.trainable_variables() model_weights = [param for param in params if "weights" in param.name] vars = tf.model_variables() target_conf_v = [None] if FLAGS.attack_confidence == "same": # set the target confidence to the confidence of the original prediction target_confidence = outs_x['conf'] target_conf_v[0] = target_confidence elif FLAGS.attack_confidence == "class_running_mean": # set the target confidence to the mean confidence of the specific target # use running mean estimate class_conf_mean = tf.Variable(np.ones(num_classes, dtype=np.float32)) batch_conf_mean = tf.unsorted_segment_mean(outs_x['conf'], outs_x['pred'], num_classes) # if batch does not contain predictions for the specific target # (zeroes), replace zeroes with stored class mean (previous batch) batch_conf_mean = tf.where(tf.not_equal(batch_conf_mean, 0), batch_conf_mean, class_conf_mean) # update class confidence mean class_conf_mean = assign_moving_average(class_conf_mean, batch_conf_mean, 0.5) # init class confidence during pre-training tf.add_to_collection("PREINIT_OPS", class_conf_mean) def target_confidence(targets_onehot): targets = tf.argmax(targets_onehot, axis=1) check_conf = tf.Assert( tf.reduce_all(tf.not_equal(class_conf_mean, 0)), [class_conf_mean]) with tf.control_dependencies([check_conf]): t = tf.gather(class_conf_mean, targets) target_conf_v[0] = t return tf.stop_gradient(t) else: target_confidence = float(FLAGS.attack_confidence) target_conf_v[0] = target_confidence X_hat = generator(X, target_confidence) outs_x_hat = model(X_hat) # select examples for which attack succeeded (changed the prediction) X_hat_filter = tf.not_equal(outs_x['pred'], outs_x_hat['pred']) X_hat_f = tf.boolean_mask(X_hat, X_hat_filter) X_f = tf.boolean_mask(X, X_hat_filter) outs_x_f = model(X_f) outs_x_hat_f = model(X_hat_f) X_hatd = tf.stop_gradient(X_hat) X_rec = generator(X_hatd, outs_x['conf'], outs_x['pred']) X_rec_f = tf.boolean_mask(X_rec, X_hat_filter) # validation/test adversarial examples X_v_hat = test_generator(X_v, FLAGS.val_attack_confidence) X_v_hatd = tf.stop_gradient(X_v_hat) X_v_rec = test_generator(X_v_hatd, outs_x_v['conf'], targets=outs_x_v['pred']) X_v_hat_df = deepfool(lambda x: test_model(x)['logits'], X_v, y_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) X_v_hat_df_all = deepfool(lambda x: test_model(x)['logits'], X_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) y_hat = outs_x['pred'] y_adv = outs_x_hat['pred'] y_adv_f = outs_x_hat_f['pred'] tf.summary.histogram('y_data', y, collections=["model_summaries"]) tf.summary.histogram('y_hat', y_hat, collections=["model_summaries"]) tf.summary.histogram('y_adv', y_adv, collections=["model_summaries"]) # critic outputs critic_outs_x = critic(X) critic_outs_x_hat = critic(X_hat_f) critic_params = list(set(tf.trainable_variables()) - set(params)) critic_vars = list(set(tf.trainable_variables()) - set(vars)) # binary logits for a specific target logits_data = critic_outs_x['logits'] logits_data_flt = tf.reshape(logits_data, (-1, )) z_data = tf.gather(logits_data_flt, tf.range(tf.shape(X)[0]) * num_classes + y) logits_adv = critic_outs_x_hat['logits'] logits_adv_flt = tf.reshape(logits_adv, (-1, )) z_adv = tf.gather(logits_adv_flt, tf.range(tf.shape(X_hat_f)[0]) * num_classes + y_adv_f) # classifier/generator losses nll = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_onehot, outs_x['logits'])) nll_v = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_v_onehot, outs_x_v['logits'])) # gan losses gan = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_adv), z_adv) rec_l1 = tf.reduce_mean( tf.reduce_sum(tf.abs(X_f - X_rec_f), axis=reduce_ind)) rec_l2 = tf.reduce_mean(tf.reduce_sum((X_f - X_rec_f)**2, axis=reduce_ind)) weight_decay = slim.apply_regularization(slim.l2_regularizer(1.0), model_weights[:-1]) pretrain_loss = nll + 5e-6 * weight_decay loss = nll + FLAGS.lmbd * gan if FLAGS.lmbd_rec_l1 > 0: loss += FLAGS.lmbd_rec_l1 * rec_l1 if FLAGS.lmbd_rec_l2 > 0: loss += FLAGS.lmbd_rec_l2 * rec_l2 if FLAGS.weight_decay > 0: loss += FLAGS.weight_decay * weight_decay # critic loss critic_gan_data = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_data), z_data) # use placeholder for X_hat to avoid recomputation of adversarial noise y_adv_h = model(X_hat_h)['pred'] logits_adv_h = critic(X_hat_h)['logits'] logits_adv_flt_h = tf.reshape(logits_adv_h, (-1, )) z_adv_h = tf.gather(logits_adv_flt_h, tf.range(tf.shape(X_hat_h)[0]) * num_classes + y_adv_h) critic_gan_adv = tf.losses.sigmoid_cross_entropy(tf.zeros_like(z_adv_h), z_adv_h) critic_gan = critic_gan_data + critic_gan_adv # Gulrajani discriminator regularizer (we do not interpolate) critic_grad_data = tf.gradients(z_data, X)[0] critic_grad_adv = tf.gradients(z_adv_h, X_hat_h)[0] critic_grad_penalty = norm_penalty(critic_grad_adv) + norm_penalty( critic_grad_data) critic_loss = critic_gan + FLAGS.lmbd_grad * critic_grad_penalty # classifier model_metrics err = 1 - slim.metrics.accuracy(outs_x['pred'], y) conf = tf.reduce_mean(outs_x['conf']) err_hat = 1 - slim.metrics.accuracy( test_model(X_hat)['pred'], outs_x['pred']) err_hat_f = 1 - slim.metrics.accuracy( test_model(X_hat_f)['pred'], outs_x_f['pred']) err_rec = 1 - slim.metrics.accuracy( test_model(X_rec)['pred'], outs_x['pred']) conf_hat = tf.reduce_mean(test_model(X_hat)['conf']) conf_hat_f = tf.reduce_mean(test_model(X_hat_f)['conf']) conf_rec = tf.reduce_mean(test_model(X_rec)['conf']) err_v = 1 - slim.metrics.accuracy(outs_x_v['pred'], y_v) conf_v_hat = tf.reduce_mean(test_model(X_v_hat)['conf']) l2_hat = tf.sqrt(tf.reduce_sum((X_f - X_hat_f)**2, axis=reduce_ind)) tf.summary.histogram('l2_hat', l2_hat, collections=["model_summaries"]) # critic model_metrics critic_err_data = 1 - binary_accuracy( z_data, tf.ones(tf.shape(z_data), tf.bool), 0.0) critic_err_adv = 1 - binary_accuracy( z_adv, tf.zeros(tf.shape(z_adv), tf.bool), 0.0) # validation model_metrics err_df = 1 - slim.metrics.accuracy(test_model(X_v_hat_df)['pred'], y_v) err_df_all = 1 - slim.metrics.accuracy( test_model(X_v_hat_df_all)['pred'], outs_x_v['pred']) l2_v_hat = tf.sqrt(tf.reduce_sum((X_v - X_v_hat)**2, axis=reduce_ind)) l2_v_rec = tf.sqrt(tf.reduce_sum((X_v - X_v_rec)**2, axis=reduce_ind)) l1_v_rec = tf.reduce_sum(tf.abs(X_v - X_v_rec), axis=reduce_ind) l2_df = tf.sqrt(tf.reduce_sum((X_v - X_v_hat_df)**2, axis=reduce_ind)) l2_df_norm = l2_df / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) l2_df_all = tf.sqrt( tf.reduce_sum((X_v - X_v_hat_df_all)**2, axis=reduce_ind)) l2_df_norm_all = l2_df_all / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) tf.summary.histogram('l2_df', l2_df, collections=["adv_summaries"]) tf.summary.histogram('l2_df_norm', l2_df_norm, collections=["adv_summaries"]) # model_metrics pretrain_model_metrics = OrderedDict([('nll', nll), ('weight_decay', weight_decay), ('err', err)]) model_metrics = OrderedDict([('loss', loss), ('nll', nll), ('l2_hat', tf.reduce_mean(l2_hat)), ('gan', gan), ('rec_l1', rec_l1), ('rec_l2', rec_l2), ('weight_decay', weight_decay), ('err', err), ('conf', conf), ('err_hat', err_hat), ('err_hat_f', err_hat_f), ('conf_t', tf.reduce_mean(target_conf_v[0])), ('conf_hat', conf_hat), ('conf_hat_f', conf_hat_f), ('err_rec', err_rec), ('conf_rec', conf_rec)]) critic_metrics = OrderedDict([('c_loss', critic_loss), ('c_gan', critic_gan), ('c_gan_data', critic_gan_data), ('c_gan_adv', critic_gan_adv), ('c_grad_norm', critic_grad_penalty), ('c_err_adv', critic_err_adv), ('c_err_data', critic_err_data)]) val_metrics = OrderedDict([('nll', nll_v), ('err', err_v)]) adv_metrics = OrderedDict([('l2_df', tf.reduce_mean(l2_df)), ('l2_df_norm', tf.reduce_mean(l2_df_norm)), ('l2_df_all', tf.reduce_mean(l2_df_all)), ('l2_df_all_norm', tf.reduce_mean(l2_df_norm_all)), ('l2_hat', tf.reduce_mean(l2_v_hat)), ('conf_hat', conf_v_hat), ('l1_rec', tf.reduce_mean(l1_v_rec)), ('l2_rec', tf.reduce_mean(l2_v_rec)), ('err_df', err_df), ('err_df_all', err_df_all)]) pretrain_metric_mean, pretrain_metric_upd = register_metrics( pretrain_model_metrics, collections="pretrain_model_summaries") metric_mean, metric_upd = register_metrics(model_metrics, collections="model_summaries") critic_metric_mean, critic_metric_upd = register_metrics( critic_metrics, collections="critic_summaries") val_metric_mean, val_metric_upd = register_metrics( val_metrics, prefix="val_", collections="val_summaries") adv_metric_mean, adv_metric_upd = register_metrics( adv_metrics, collections="adv_summaries") metrics_reset = tf.variables_initializer(tf.local_variables()) # training ops lr = tf.Variable(FLAGS.lr, trainable=False) critic_lr = tf.Variable(FLAGS.critic_lr, trainable=False) tf.summary.scalar('lr', lr, collections=["model_summaries"]) tf.summary.scalar('critic_lr', critic_lr, collections=["critic_summaries"]) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) preinit_ops = tf.get_collection("PREINIT_OPS") with tf.control_dependencies(preinit_ops): pretrain_solver = optimizer.minimize(pretrain_loss, var_list=params) solver = optimizer.minimize(loss, var_list=params) critic_solver = (tf.train.AdamOptimizer( learning_rate=critic_lr, beta1=0.5).minimize(critic_loss, var_list=critic_params)) # train summary_images, summary_labels = select_balanced_subset( train_ds.images, train_ds.labels, num_classes, num_classes) summary_images = summary_images.transpose((0, 3, 1, 2)) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) if FLAGS.gpu_memory < 1.0: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory) config = tf.ConfigProto(gpu_options=gpu_options) else: config = None with tf.Session(config=config) as sess: try: # summaries summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) summaries = tf.summary.merge_all("model_summaries") critic_summaries = tf.summary.merge_all("critic_summaries") val_summaries = tf.summary.merge_all("val_summaries") adv_summaries = tf.summary.merge_all("adv_summaries") # initialization tf.local_variables_initializer().run() tf.global_variables_initializer().run() # pretrain model if FLAGS.pretrain_niter > 0: logging.info("Model pretraining") for epoch in range(1, FLAGS.pretrain_niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): sess.run([pretrain_solver, pretrain_metric_upd], feed_dict={ X: images, y: labels }) str_bfr = six.StringIO() str_bfr.write("Pretrain epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, pretrain_model_metrics.keys(), sess.run(pretrain_metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # training for epoch in range(1, FLAGS.niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): batch_index = (epoch - 1) * (train_ds.images.shape[0] // FLAGS.batch_size) + ind # train critic for several steps X_hat_np = sess.run(X_hat, feed_dict={X: images}) for _ in range(FLAGS.critic_steps - 1): sess.run([critic_solver], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np }) else: summary = sess.run([ critic_solver, critic_metric_upd, critic_summaries ], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np })[-1] summary_writer.add_summary(summary, batch_index) # train model summary = sess.run([solver, metric_upd, summaries], feed_dict={ X: images, y: labels })[-1] summary_writer.add_summary(summary, batch_index) str_bfr = six.StringIO() str_bfr.write("Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, model_metrics.keys(), sess.run(metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) val_iterator = batch_iterator(val_ds.images, val_ds.labels, 100, shuffle=False) for images, labels in val_iterator: summary = sess.run([val_metric_upd, val_summaries], feed_dict={ X_v: images, y_v: labels })[-1] summary_writer.add_summary(summary, epoch) str_bfr = six.StringIO() str_bfr.write("Valid epoch [{}]:".format(epoch)) print_results_str(str_bfr, val_metrics.keys(), sess.run(val_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # learning rate decay update_lr = lr_decay(lr, epoch) if update_lr is not None: sess.run(update_lr) logging.debug( "learning rate was updated to: {:.10f}".format( lr.eval())) critic_update_lr = lr_decay(critic_lr, epoch, prefix='critic_') if critic_update_lr is not None: sess.run(critic_update_lr) logging.debug( "critic learning rate was updated to: {:.10f}".format( critic_lr.eval())) if epoch % FLAGS.summary_frequency == 0: samples_hat, samples_rec, samples_df, summary = sess.run( [ X_v_hat, X_v_rec, X_v_hat_df, adv_summaries, adv_metric_upd ], feed_dict={ X_v: summary_images, y_v: summary_labels })[:-1] summary_writer.add_summary(summary, epoch) save_path = os.path.join(FLAGS.samples_dir, 'epoch_orig-%d.png' % epoch) save_images(summary_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) save_images(samples_hat, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_rec-%d.png' % epoch) save_images(samples_rec, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_df-%d.png' % epoch) save_images(samples_df, save_path) str_bfr = six.StringIO() str_bfr.write("Summary epoch [{}]:".format(epoch)) print_results_str(str_bfr, adv_metrics.keys(), sess.run(adv_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) if FLAGS.checkpoint_frequency != -1 and epoch % FLAGS.checkpoint_frequency == 0: save_checkpoint(sess, vars, epoch=epoch) save_checkpoint(sess, critic_vars, name="critic_model", epoch=epoch) except KeyboardInterrupt: logging.debug("Keyboard interrupt. Stopping training...") except NanError as e: logging.info(e) finally: sess.run(metrics_reset) save_checkpoint(sess, vars) save_checkpoint(sess, critic_vars, name="critic_model") # final accuracy test_iterator = batch_iterator(test_ds.images, test_ds.labels, 100, shuffle=False) for images, labels in test_iterator: sess.run([val_metric_upd], feed_dict={X_v: images, y_v: labels}) str_bfr = six.StringIO() str_bfr.write("Final epoch [{}]:".format(epoch)) for metric_name, metric_value in zip(val_metrics.keys(), sess.run(val_metric_mean)): str_bfr.write(" {}: {:.6f},".format(metric_name, metric_value)) logging.info(str_bfr.getvalue()[:-1])
def main(argv): del argv # unused if tf.io.gfile.exists(FLAGS.model_dir): tf.compat.v1.logging.warning( 'Warning: deleting old log directory at {}'.format(FLAGS.model_dir)) tf.io.gfile.rmtree(FLAGS.model_dir) tf.io.gfile.makedirs(FLAGS.model_dir) # Collapse the image data dimension for use with a fully-connected layer. image_size = np.prod(IMAGE_SHAPE, dtype=np.int32) if FLAGS.fake_data: train_images = build_fake_data([10, image_size]) else: mnist_data = mnist.read_data_sets(FLAGS.data_dir, reshape=image_size) train_images = mnist_data.train.images images = build_input_pipeline(train_images, FLAGS.batch_size) # Build a Generative network. We use the Flipout Monte Carlo estimator # for the fully-connected layers: this enables lower variance stochastic # gradients than naive reparameterization. with tf.compat.v1.name_scope('Generator'): random_noise = tf.placeholder(tf.float64, shape=[None, FLAGS.hidden_size]) generative_net = tf.keras.Sequential([ tfp.layers.DenseFlipout(FLAGS.hidden_size, activation=tf.nn.relu), tfp.layers.DenseFlipout(image_size, activation=tf.sigmoid) ]) sythetic_image = generative_net(random_noise) # Build a Discriminative network. Define the model as a Bernoulli # distribution parameterized by logits from a fully-connected layer. with tf.compat.v1.name_scope('Discriminator'): discriminative_net = tf.keras.Sequential([ tfp.layers.DenseFlipout(FLAGS.hidden_size, activation=tf.nn.relu), tfp.layers.DenseFlipout(1) ]) logits_real = discriminative_net(images) logits_fake = discriminative_net(sythetic_image) labels_distribution_real = tfd.Bernoulli(logits=logits_real) labels_distribution_fake = tfd.Bernoulli(logits=logits_fake) # Compute the model loss for discrimator and generator, averaged over # the batch size. loss_real = -tf.reduce_mean( input_tensor=labels_distribution_real.log_prob( tf.ones_like(logits_real))) loss_fake = -tf.reduce_mean( input_tensor=labels_distribution_fake.log_prob( tf.zeros_like(logits_fake))) loss_discriminator = loss_real + loss_fake loss_generator = -tf.reduce_mean( input_tensor=labels_distribution_fake.log_prob( tf.ones_like(logits_fake))) with tf.compat.v1.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_op_discriminator = optimizer.minimize( loss_discriminator, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')) train_op_generator = optimizer.minimize( loss_generator, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')) with tf.compat.v1.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(FLAGS.max_steps + 1): # Iterate gradient updates on each network. _, loss_value_d = sess.run([train_op_discriminator, loss_discriminator], feed_dict={random_noise: build_fake_data( [FLAGS.batch_size, FLAGS.hidden_size])}) _, loss_value_g = sess.run([train_op_generator, loss_generator], feed_dict={random_noise: build_fake_data( [FLAGS.batch_size, FLAGS.hidden_size])}) # Visualize some sythetic images produced by the generative network. if step % FLAGS.viz_steps == 0: images = sess.run(sythetic_image, feed_dict={random_noise: build_fake_data( [16, FLAGS.hidden_size])}) plot_generated_images(images, fname=os.path.join( FLAGS.model_dir, 'step{:06d}_images.png'.format(step))) print('Step: {:>3d} Loss_discriminator: {:.3f} ' 'Loss_generator: {:.3f}'.format(step, loss_value_d, loss_value_g))
import os import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets import mnist # mnist dataset 저장할 디렉터리 save_dir = 'exam/mnist' # save_dir 에 MNIST 데이터 받기 data_sets = mnist.read_data_sets(save_dir, dtype=tf.uint8, reshape=False, validation_size=1000) data_splits = ['train', 'test', 'validation'] for i, split in enumerate(data_splits): print("saving %s" % split) data_set = data_sets[i] filename = os.path.join(save_dir, '%s.tfrecords' % split) writer = tf.python_io.TFRecordWriter(filename) for index in range(data_set.images.shape[0]): image = data_set.images[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tf.train.Feature(int64_list=tf.train.Int64List( value=[data_set.images.shape[1]])), 'width': tf.train.Feature(int64_list=tf.train.Int64List( value=[data_set.images.shape[2]])),
def run(): # create the cluster configured by `ps_hosts' and 'worker_hosts' cluster = tf.train.ClusterSpec(clusterSpec) # create a server for local task server = tf.train.Server(cluster, job_name=jobName, task_index=taskIndex) if jobName == "ps": server.join() # ps hosts only join elif jobName == "worker": # workers perform the operation # ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy(FLAGS.num_ps) # Note: tf.train.replica_device_setter automatically place the paramters (Variables) # on the ps hosts (default placement strategy: round-robin over all ps hosts, and also # place multi copies of operations to each worker host with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % (taskIndex), cluster=cluster)): # load mnist dataset mnist = read_data_sets("./dataset", one_hot=True) # the model images = tf.placeholder(tf.float32, [None, 784]) labels = tf.placeholder(tf.int32, [None, 10]) logits = model(images) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) # The StopAtStepHook handles stopping after running given steps. hooks = [tf.train.StopAtStepHook(last_step=2000)] global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=1e-04) if True: # asynchronous training # use tf.train.SyncReplicasOptimizer wrap optimizer # ref: https://www.tensorflow.org/api_docs/python/tf/train/SyncReplicasOptimizer optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=2, total_num_replicas=2) # create the hook which handles initialization and queues hooks.append(optimizer.make_session_run_hook((taskIndex == 0))) train_op = optimizer.minimize( loss, global_step=global_step, aggregation_method=tf.AggregationMethod.ADD_N) # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. with tf.train.MonitoredTrainingSession( master=server.target, is_chief=(taskIndex == 0), checkpoint_dir=checkpoint_dir, hooks=hooks) as mon_sess: while not mon_sess.should_stop(): # mon_sess.run handles AbortedError in case of preempted PS. img_batch, label_batch = mnist.train.next_batch(32) _, ls, step = mon_sess.run([train_op, loss, global_step], feed_dict={ images: img_batch, labels: label_batch }) if step % 100 == 0: print("Train step %d, loss: %f" % (step, ls))
def main(args): input = read_data_sets(args.data_dir) data, label = input.train.next_batch(1) print('Label: %d' % label.tolist()[0]) with open(args.dest, 'w+') as f: pickle.dump(data, f)
threads = tf.train.start_queue_runners( coord=coord) # グラフで収集されたすべてのキューランナーを開始 for i in range(10000): image, label = sess.run([images_gen, labels_gen]) # 次のキューから画像データとラベルを取得 img = Image.fromarray(image, "L") # グレースケール fname = "tfrecords_{0}-{1}.jpg".format(str(i), label) img.save(os.path.join(result_dir, fname)) # 画像を保存 finally: coord.request_stop() # すべてのスレッドが停止するように要求 coord.join(threads) # スレッドが終了するのを待つ return # jpeg ファイルの作成. mnist = read_data_sets("MNIST_data", one_hot=True) # print(type(mnist.test.images)) # numpy.ndarray # print(mnist.test.images.shape) # (10000, 784) jpeg_dir = "mnist_jpeg_train2" numpy_ndarray_to_jpeg_files(mnist.train.images, (28, 28), jpeg_dir, mnist.train.labels) # jpeg_dir = "mnist_jpeg_test2" # numpy_ndarray_to_jpeg_files( mnist.test.images, (28,28), # jpeg_dir, mnist.test.labels ) # tfrecord 作成. #fpath_list = glob.glob( os.path.join( jpeg_dir , "*.jpg") ) #tfrecord_fpath = "mnist_train.tfrecord" # fpath_list_to_tfrecord( fpath_list, tfrecord_fpath ) # tfrecord の読み込み.
import tensorflow from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets # 读入数据 mnist = read_data_sets('dataset/mnist', one_hot=True) # x 是训练图像的占位符 x = tensorflow.placeholder(tensorflow.float32, [None, 784]) # y_ 是训练图像标签的占位符 y = tensorflow.placeholder(tensorflow.float32, [None, 10]) # 第一层卷积层 # tensorflow.nn.conv2d 和 tensorflow.nn.max_pool 计算的尺寸跟 strides 有关 # 尺寸的计算方法是:ceil(size / strides_size) w_conv_1 = tensorflow.Variable(tensorflow.truncated_normal([5, 5, 1, 32], stddev=0.1)) b_conv_1 = tensorflow.Variable(tensorflow.constant(0.1, shape=[32])) # 将图片从 784 维向量重新还原为 28*28 的矩阵图片 x_reshape = tensorflow.reshape(x, [-1, 28, 28, 1]) # 获得 28*28 的矩阵 features_conv_1 = tensorflow.nn.conv2d(x_reshape, w_conv_1, strides=[1, 1, 1, 1], padding='SAME') + b_conv_1 h_conv_1 = tensorflow.nn.relu(features_conv_1) # 获得 14*14 的矩阵 h_pool_1 = tensorflow.nn.max_pool(h_conv_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 第二层卷积层 w_conv_2 = tensorflow.Variable(tensorflow.truncated_normal([5, 5, 32, 64], stddev=0.1)) b_conv_2 = tensorflow.Variable(tensorflow.constant(0.1, shape=[64])) # 获得 14*14 的矩阵 features_conv_2 = tensorflow.nn.conv2d(h_pool_1, w_conv_2, strides=[1, 1, 1, 1], padding='SAME') + b_conv_2 h_conv_2 = tensorflow.nn.relu(features_conv_2) # 获得 7*7 的矩阵 h_pool_2 = tensorflow.nn.max_pool(h_conv_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def main(argv): del argv # unused FLAGS.encoder_layers = [int(units) for units in FLAGS.encoder_layers.split(",")] FLAGS.decoder_layers = [int(units) for units in FLAGS.decoder_layers.split(",")] FLAGS.activation = getattr(tf.nn, FLAGS.activation) if tf.gfile.Exists(FLAGS.model_dir): tf.logging.warn("Deleting old log directory at {}".format(FLAGS.model_dir)) tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) if FLAGS.fake_data: mnist_data = build_fake_data() else: mnist_data = mnist.read_data_sets(FLAGS.data_dir) with tf.Graph().as_default(): (images, _, handle, training_iterator, heldout_iterator) = build_input_pipeline( mnist_data, FLAGS.batch_size, mnist_data.validation.num_examples) # Reshape as a pixel image and binarize pixels. images = tf.reshape(images, shape=[-1] + IMAGE_SHAPE) images = tf.cast(images > 0.5, dtype=tf.int32) encoder = Encoder(FLAGS.encoder_layers, FLAGS.activation, FLAGS.latent_size, FLAGS.code_size) decoder = Decoder(FLAGS.decoder_layers, FLAGS.activation, IMAGE_SHAPE) vector_quantizer = VectorQuantizer(FLAGS.num_codes, FLAGS.code_size) # Build the model and loss function. loss, _, decoder_distribution, _ = make_vq_vae( images, encoder, decoder, vector_quantizer, FLAGS.beta, FLAGS.decay) reconstructed_images = decoder_distribution.mean() # Decode samples from a uniform prior for visualization. prior = tfd.Multinomial(total_count=1., logits=tf.zeros(FLAGS.num_codes)) prior_samples = tf.reduce_sum( tf.expand_dims(prior.sample([10, FLAGS.latent_size]), -1) * tf.reshape(vector_quantizer.codebook, [1, 1, FLAGS.num_codes, FLAGS.code_size]), axis=2) decoded_distribution_given_random_prior = decoder(prior_samples) random_images = decoded_distribution_given_random_prior.mean() # Perform inference by minimizing the loss function. optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = optimizer.minimize(loss) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: summary_writer = tf.summary.FileWriter(FLAGS.model_dir, sess.graph) sess.run(init) # Run the training loop. train_handle = sess.run(training_iterator.string_handle()) heldout_handle = sess.run(heldout_iterator.string_handle()) for step in range(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss], feed_dict={handle: train_handle}) duration = time.time() - start_time if step % 100 == 0: print("Step: {:>3d} Loss: {:.3f} ({:.3f} sec)".format( step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict={handle: train_handle}) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Periodically save a checkpoint and visualize model progress. if (step + 1) % FLAGS.viz_steps == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.model_dir, "model.ckpt") saver.save(sess, checkpoint_file, global_step=step) # Visualize inputs and model reconstructions from the training set. images_val, reconstructions_val, random_images_val = sess.run( (images, reconstructed_images, random_images), feed_dict={handle: train_handle}) visualize_training(images_val, reconstructions_val, random_images_val, log_dir=FLAGS.model_dir, prefix="step{:05d}_train".format(step)) # Visualize inputs and model reconstructions from the validation set. heldout_images_val, heldout_reconstructions_val = sess.run( (images, reconstructed_images), feed_dict={handle: heldout_handle}) visualize_training(heldout_images_val, heldout_reconstructions_val, None, log_dir=FLAGS.model_dir, prefix="step{:05d}_validation".format(step))
def train(): # Train a Variational Autoencoder on MNIST # Input placeholders with tf.name_scope('data'): x = tf.placeholder(tf.float32, [None, 28, 28, 1]) tf.summary.image('data', x) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) with st.value_type(st.SampleValue()): # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = st.StochasticTensor( distributions.Normal(mu=q_mu, sigma=q_sigma)) with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the # generative network p_x_given_z_logits = generative_network(z=q_z, hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.Bernoulli(logits=p_x_given_z_logits) posterior_predictive_samples = p_x_given_z.sample() tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32)) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.Normal(mu=np.zeros(FLAGS.latent_dim, dtype=np.float32), sigma=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample_n(FLAGS.n_samples) p_x_given_z_logits = generative_network(z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.Bernoulli(logits=p_x_given_z_logits) prior_predictive_samples = prior_predictive.sample() tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32)) # Take samples from the prior with a placeholder with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_logits = generative_network(z=z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.Bernoulli( logits=p_x_given_z_logits) prior_predictive_inp_sample = prior_predictive_inp.sample() # Build the evidence lower bound (ELBO) or the negative loss # kl=distributions.kl(q_z.distribution, p_z) kl = tf.reduce_sum(distributions.kl(q_z.distribution, p_z), 1) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(x), [1, 2, 3]) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) print(elbo) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001) train_op = optimizer.minimize(-elbo) tf.scalar_summary("ELBO", elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) mnist = read_data_sets(FLAGS.data_dir, one_hot=True) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) # Get fixed MNIST digits for plotting posterior means during training np_x_fixed, np_y = mnist.test.next_batch(5000) np_x_fixed = np_x_fixed.reshape(5000, 28, 28, 1) np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) for i in range(FLAGS.n_iterations): # Re-binarize the data at every batch; this improves results np_x, _ = mnist.train.next_batch(FLAGS.batch_size) np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1) np_x = (np_x > 0.5).astype(np.float32) sess.run(train_op, {x: np_x}) # Print progress and save samples every so often t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # Save samples # np_posterior_samples, np_prior_samples = sess.run( # [posterior_predictive_samples, prior_predictive_samples], {x: np_x}) # for k in range(FLAGS.n_samples): # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_posterior_predictive_%d_data.jpg' % (i, k)) # imsave(f_name, np_x[k, :, :, 0]) # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_posterior_predictive_%d_sample.jpg' % (i, k)) # imsave(f_name, np_posterior_samples[k, :, :, 0]) # f_name = os.path.join( # FLAGS.logdir, 'iter_%d_prior_predictive_%d.jpg' % (i, k)) # imsave(f_name, np_prior_samples[k, :, :, 0]) # Plot the posterior predictive space if i == ((FLAGS.n_iterations) - FLAGS.print_every): print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset." print "Running example on 2,500 MNIST digits..." X = sess.run(q_mu, {x: np_x_fixed}) print(X.shape) labels = np_y print(labels.shape) Y = tsne.tsne(X, 2, 50, 20.0) cmap = plt.get_cmap('gist_rainbow') plt.scatter(Y[:, 0], Y[:, 1], 20, c=np.argmax(labels, 1), cmap=cmap) plt.show()
def main(argv): del argv # unused FLAGS.encoder_layers = [ int(units) for units in FLAGS.encoder_layers.split(",") ] FLAGS.decoder_layers = [ int(units) for units in FLAGS.decoder_layers.split(",") ] FLAGS.activation = getattr(tf.nn, FLAGS.activation) if tf.gfile.Exists(FLAGS.model_dir): tf.logging.warn("Deleting old log directory at {}".format( FLAGS.model_dir)) tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) if FLAGS.fake_data: mnist_data = build_fake_data() else: mnist_data = mnist.read_data_sets(FLAGS.data_dir) with tf.Graph().as_default(): (images, _, handle, training_iterator, heldout_iterator) = build_input_pipeline( mnist_data, FLAGS.batch_size, mnist_data.validation.num_examples) # Reshape as a pixel image and binarize pixels. images = tf.reshape(images, shape=[-1] + IMAGE_SHAPE) images = tf.cast(images > 0.5, dtype=tf.int32) encoder = Encoder(FLAGS.encoder_layers, FLAGS.activation, FLAGS.latent_size, FLAGS.code_size) decoder = Decoder(FLAGS.decoder_layers, FLAGS.activation, IMAGE_SHAPE) vector_quantizer = VectorQuantizer(FLAGS.num_codes, FLAGS.code_size) # Build the model and loss function. loss, _, decoder_distribution, _ = make_vq_vae(images, encoder, decoder, vector_quantizer, FLAGS.beta, FLAGS.decay) reconstructed_images = decoder_distribution.mean() # Decode samples from a uniform prior for visualization. prior = tfd.Multinomial(total_count=1., logits=tf.zeros(FLAGS.num_codes)) prior_samples = tf.reduce_sum( tf.expand_dims(prior.sample([10, FLAGS.latent_size]), -1) * tf.reshape(vector_quantizer.codebook, [1, 1, FLAGS.num_codes, FLAGS.code_size]), axis=2) decoded_distribution_given_random_prior = decoder(prior_samples) random_images = decoded_distribution_given_random_prior.mean() # Perform inference by minimizing the loss function. optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = optimizer.minimize(loss) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: summary_writer = tf.summary.FileWriter(FLAGS.model_dir, sess.graph) sess.run(init) # Run the training loop. train_handle = sess.run(training_iterator.string_handle()) heldout_handle = sess.run(heldout_iterator.string_handle()) for step in range(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss], feed_dict={handle: train_handle}) duration = time.time() - start_time if step % 100 == 0: print("Step: {:>3d} Loss: {:.3f} ({:.3f} sec)".format( step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict={handle: train_handle}) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Periodically save a checkpoint and visualize model progress. if (step + 1) % FLAGS.viz_steps == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.model_dir, "model.ckpt") saver.save(sess, checkpoint_file, global_step=step) # Visualize inputs and model reconstructions from the training set. images_val, reconstructions_val, random_images_val = sess.run( (images, reconstructed_images, random_images), feed_dict={handle: train_handle}) visualize_training(images_val, reconstructions_val, random_images_val, log_dir=FLAGS.model_dir, prefix="step{:05d}_train".format(step)) # Visualize inputs and model reconstructions from the validation set. heldout_images_val, heldout_reconstructions_val = sess.run( (images, reconstructed_images), feed_dict={handle: heldout_handle}) visualize_training( heldout_images_val, heldout_reconstructions_val, None, log_dir=FLAGS.model_dir, prefix="step{:05d}_validation".format(step))
def load_mnist(train_dir='MNIST-data'): return read_data_sets(train_dir)
# digits and create a simple CNN network to predict the # digit category (0-9) import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets from tensorflow.python.framework import ops ops.reset_default_graph() # Start a graph session sess = tf.Session() # Load data data_dir = 'temp' mnist = read_data_sets(data_dir) # Convert images into 28x28 (they are downloaded as 1x784) train_xdata = np.array([np.reshape(x, (28, 28)) for x in mnist.train.images]) test_xdata = np.array([np.reshape(x, (28, 28)) for x in mnist.test.images]) # Convert labels into one-hot encoded vectors train_labels = mnist.train.labels test_labels = mnist.test.labels # Set model parameters batch_size = 100 learning_rate = 0.005 evaluation_size = 500 image_width = train_xdata[0].shape[0] image_height = train_xdata[0].shape[1]
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Functions for downloading and reading MNIST data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import gzip import os import tempfile import numpy from six.moves import urllib from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets # import tensorflow.examples.tutorials.mnist.input_data # mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) mnist = read_data_sets("MNIST_data/", one_hot=True)
ROLE = "" # TODO Input IAM Role BUCKET = "" # TODO Input Bucket Name OUTPUT_PATH = "s3://{}/hands-on/mnist-tensorflow".format(BUCKET) #ROLE = get_execution_role() ENDPOINT_NAME = "mnist-tensorflow-jhy" def show_image(arr): pixels = arr.reshape(28, 28) plt.imshow(pixels, cmap="gray") plt.show() if __name__ == "__main__": # download MNIST dataset data_set = mnist.read_data_sets('data', reshape=True) # upload local datasets to S3 bucket sagemaker_session = sagemaker.Session() inputs = sagemaker_session.upload_data( path='data', bucket=BUCKET, key_prefix='hands-on/mnist-tensorflow/data') # create model with custom tensorflow code mnist_estimator = TensorFlow(role=ROLE, entry_point='mnist.py', output_path=OUTPUT_PATH, code_location=OUTPUT_PATH, training_steps=1000, evaluation_steps=100,
# neural network with 5 layers # # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28*28 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (relu+BN) W1 [784, 200] B1[200] # · · · · · · · · · Y1 [batch, 200] # \x/x\x/x\x/x\x/ -- fully connected layer (relu+BN) W2 [200, 100] B2[100] # · · · · · · · Y2 [batch, 100] # \x/x\x/x\x/ -- fully connected layer (relu+BN) W3 [100, 60] B3[60] # · · · · · Y3 [batch, 60] # \x/x\x/ -- fully connected layer (relu+BN) W4 [60, 30] B4[30] # · · · Y4 [batch, 30] # \x/ -- fully connected layer (softmax) W5 [30, 10] B5[10] # · Y5 [batch, 10] # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0) # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # correct answers will go here Y_ = tf.placeholder(tf.float32, [None, 10]) # variable learning rate lr = tf.placeholder(tf.float32) # train/test selector for batch normalisation tst = tf.placeholder(tf.bool) # training iteration iter = tf.placeholder(tf.int32) # five layers and their number of neurons (tha last layer has 10 softmax neurons) L = 200 M = 100
def run_training(): """Train MNIST for a number of steps.""" data_sets = read_data_sets(INPUT_DATA_DIR) with tf.Session() as sess: images_placeholder = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_PIXELS)) labels_placeholder = tf.placeholder(tf.int32, shape=(BATCH_SIZE)) # Hidden 1. with tf.name_scope('hidden1'): weights = tf.Variable( tf.truncated_normal([IMAGE_PIXELS, HIDDEN1], stddev=1.0 / math.sqrt(IMAGE_PIXELS)), name='weights') biases = tf.Variable(tf.zeros([HIDDEN1]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images_placeholder, weights) + biases) # Hidden 2 with tf.name_scope('hidden2'): weights = tf.Variable( tf.truncated_normal([HIDDEN1, HIDDEN2], stddev=1.0 / math.sqrt(HIDDEN1)), name='weights') biases = tf.Variable(tf.zeros([HIDDEN2]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) # Linear with tf.name_scope('softmax_linear'): weights = tf.Variable( tf.truncated_normal([HIDDEN2, NUM_CLASSES], stddev=1.0 / math.sqrt(HIDDEN2)), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden2, weights) + biases # Add to the Graph the Ops for loss calculation. labels = tf.to_int64(labels_placeholder) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) correct = tf.nn.in_top_k(logits, labels, 1) eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) init = tf.global_variables_initializer() sess.run(init) for step in range(ITERATIONS): start_time = time.time() feed_dict = get_next_batch(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) if (step + 1) % 1000 == 0 or (step + 1) == ITERATIONS: print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): with tf.Graph().as_default(): # train data and run valid after each epoch, so nb_epochs=1 images, labels = inputs(train=True, batch_size=cfg.FLAGS.batch_size, nb_epochs=cfg.FLAGS.nb_epochs) logits = mnist.inference(images, cfg.FLAGS.hidden1, cfg.FLAGS.hidden2) loss = mnist.loss(logits, labels) train_op = mnist.training(loss, cfg.FLAGS.learning_rate) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) data_sets = mnist_datasets.read_data_sets(cfg.FLAGS.train_dir, dtype=tf.uint8, reshape=False, validation_size=cfg.FLAGS.validation_size) nb_train_samples = data_sets.train.num_examples # print('training samples: {}; batch_size: {}'.format(nb_train_samples, cfg.FLAGS.batch_size)) # .. 55000 and 100 # prepare validation data in terms of tf.constant image_valid_np = data_sets.validation.images.reshape((cfg.FLAGS.validation_size, mnist.IMAGE_PIXELS)) label_valid_np = data_sets.validation.labels # shape (5000,) # to fit the batch size idx_valid = np.random.choice(cfg.FLAGS.validation_size, cfg.FLAGS.batch_size, replace=False) image_valid_np = image_valid_np[idx_valid, :] image_valid_np = image_valid_np * (1. / 255) - 0.5 # remember to preprocessing label_valid_np = label_valid_np[idx_valid] step = 0 epoch_idx = 0 try: start_time = time.time() while not coord.should_stop(): _, loss_value = sess.run([train_op, loss]) step += 1 if step >= nb_train_samples // cfg.FLAGS.batch_size: epoch_idx += 1 end_time = time.time() duration = end_time - start_time print('Training Epoch {}, Step {}: loss = {:.02f} ({:.03f} sec)' .format(epoch_idx, step, loss_value, duration)) start_time = end_time # re-timing step = 0 # reset step counter # derive loss on validation dataset loss_valid_value = sess.run(loss, feed_dict={images: image_valid_np, labels: label_valid_np}) print('Validation Epoch {}: loss = {:.02f}' .format(epoch_idx, loss_valid_value)) except tf.errors.OutOfRangeError: print('Done training for epoch {}, {} steps'.format(epoch_idx, step)) finally: coord.request_stop() # # restart runner for validation data # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess=sess, coord=coord) # # step = 0 # try: # start_time = time.time() # while not coord.should_stop(): # loss_value_valid = sess.run(loss_valid) # step += 1 # except tf.errors.OutOfRangeError: # print('Done validation for epoch {}, {} steps'.format(epoch_idx, step)) # finally: # coord.request_stop() # duration = time.time() - start_time # print('Validation: Epoch {}, Step {}: loss = {:.02f} ({:.03f} sec)' # .format(epoch_idx, step, loss_value_valid, duration)) coord.join(threads) sess.close()
def main(_): """Build the full graph for feeding inputs, training, and saving checkpoints. Run the training. Then, load the saved graph and run some predictions.""" # Get input data: get the sets of images and labels for training, # validation, and test on MNIST. data_sets = read_data_sets(FLAGS.data_dir, False) mnist_graph = tf.Graph() with mnist_graph.as_default(): # Generate placeholders for the images and labels. images_placeholder = tf.placeholder(tf.float32) labels_placeholder = tf.placeholder(tf.int32) tf.add_to_collection("images", images_placeholder) # Remember this Op. tf.add_to_collection("labels", labels_placeholder) # Remember this Op. # Build a Graph that computes predictions from the inference model. logits = mnist_inference(images_placeholder, HIDDEN1_UNITS, HIDDEN2_UNITS) tf.add_to_collection("logits", logits) # Remember this Op. # Add to the Graph the Ops that calculate and apply gradients. train_op, loss = mnist_training( logits, labels_placeholder, 0.01) # prediction accuracy _, indices_op = tf.nn.top_k(logits) flattened = tf.reshape(indices_op, [-1]) correct_prediction = tf.cast( tf.equal(labels_placeholder, flattened), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Define info to be used by the SummaryWriter. This will let # TensorBoard plot values during the training process. loss_summary = tf.scalar_summary("loss", loss) acc_summary = tf.scalar_summary("accuracy", accuracy) train_summary_op = tf.merge_summary([loss_summary, acc_summary]) # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a summary writer. print("Writing Summaries to %s" % FLAGS.model_dir) train_summary_writer = tf.train.SummaryWriter(FLAGS.model_dir) # Uncomment the following line to see what we have constructed. # tf.train.write_graph(tf.get_default_graph().as_graph_def(), # "/tmp", "complete.pbtxt", as_text=True) # Run training for MAX_STEPS and save checkpoint at the end. with tf.Session(graph=mnist_graph) as sess: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.num_steps): # Read a batch of images and labels. images_feed, labels_feed = data_sets.train.next_batch(BATCH_SIZE) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value, tsummary, acc = sess.run( [train_op, loss, train_summary_op, accuracy], feed_dict={images_placeholder: images_feed, labels_placeholder: labels_feed}) if step % 100 == 0: # Write summary info train_summary_writer.add_summary(tsummary, step) if step % 1000 == 0: # Print loss/accuracy info print('----Step %d: loss = %.4f' % (step, loss_value)) print("accuracy: %s" % acc) print("\nWriting checkpoint file.") checkpoint_file = os.path.join(FLAGS.model_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) _, loss_value = sess.run( [train_op, loss], feed_dict={images_placeholder: data_sets.test.images, labels_placeholder: data_sets.test.labels}) print("Test set loss: %s" % loss_value) # Run evaluation based on the saved checkpoint. with tf.Session(graph=tf.Graph()) as sess: checkpoint_file = tf.train.latest_checkpoint(FLAGS.model_dir) print("\nRunning evaluation based on saved checkpoint.") print("checkpoint file: {}".format(checkpoint_file)) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Retrieve the Ops we 'remembered'. logits = tf.get_collection("logits")[0] images_placeholder = tf.get_collection("images")[0] labels_placeholder = tf.get_collection("labels")[0] # Add an Op that chooses the top k predictions. eval_op = tf.nn.top_k(logits) # Run evaluation. images_feed, labels_feed = data_sets.validation.next_batch( EVAL_BATCH_SIZE) prediction = sess.run(eval_op, feed_dict={images_placeholder: images_feed, labels_placeholder: labels_feed}) for i in range(len(labels_feed)): print("Ground truth: %d\nPrediction: %d" % (labels_feed[i], prediction.indices[i][0]))
def main(): args = get_argument() # initialize tf session sess = tf.Session() # load MNIST data mnist = read_data_sets(args.data_dir, reshape=False, validation_size=0) print("Input MNIST image shape: " + str(mnist.train.images.shape)) # resize MNIST images to 32x32 train_images = [resize(img,(32,32)) for img in mnist.train.images] train_images = train_images[40000:50000] print(len(train_images)) train_images = np.expand_dims(train_images, axis=3) print("Resized MNIST images: " + str(train_images.shape)) # extract saak anchors if args.restore_model_from is None: anchors = get_saak_anchors(train_images, sess) np.save(args.model_dir, {'anchors': anchors}) else: print("\nRestore from existing model:") data = np.load(args.restore_model_from).item() anchors = data['anchors'] print("Restoration succeed!\n") # build up saak model print("Build up Saak model") model = SaakModel() model.load(anchors) # prepare testing images print("Prepare testing images") input_data = tf.placeholder(tf.float32) test_images = [resize(img,(32,32)) for img in mnist.test.images] test_images = np.expand_dims(test_images, axis=3) # compute saak coefficients for testing images if args.restore_coef_from is None: print("Compute saak coefficients") out = model.inference(input_data, layer=0) test_coef = sess.run(out, feed_dict={input_data: test_images}) train_coef = sess.run(out, feed_dict={input_data: train_images}) # save saak coefficients print("Save saak coefficients") np.save(args.saak_coef_dir, {'train': train_coef, 'test': test_coef}) else: print("Restore saak coefficients from existing file") data = np.load(args.restore_coef_from).item() train_coef = data['train'] test_coef = data['test'] # fit svm classifier train_coef = np.reshape(train_coef, [train_coef.shape[0], -1]) #test_coef = np.reshape(test_coef, [test_coef.shape[0], -1]) print("Saak feature dimension: " + str(train_coef.shape[1])) # write to csv myFile = open('csv_train40000-50000.csv','w') with myFile: writer = csv.writer(myFile) writer.writerows(train_coef) test_coef = np.reshape(test_coef, [test_coef.shape[0], -1]) print("\nDo classification using SVM") accuracy = classify_svm(train_coef, mnist.train.labels, test_coef, mnist.test.labels) print("Accuracy: %.3f" % accuracy)
def train(): # Train a Variational Autoencoder on MNIST # Input placeholders with tf.name_scope('data'): x = tf.placeholder(tf.float32, [None, 28, 28, 1]) tf.summary.image('data', x) with tf.variable_scope('variational'): q_mu, q_sigma = inference_network(x=x, latent_dim=FLAGS.latent_dim, hidden_size=FLAGS.hidden_size) # The variational distribution is a Normal with mean and standard # deviation given by the inference network q_z = distributions.Normal(loc=q_mu, scale=q_sigma) assert q_z.reparameterization_type == distributions.FULLY_REPARAMETERIZED with tf.variable_scope('model'): # The likelihood is Bernoulli-distributed with logits given by the # generative network p_x_given_z_logits = generative_network(z=q_z.sample(), hidden_size=FLAGS.hidden_size) p_x_given_z = distributions.Bernoulli(logits=p_x_given_z_logits) posterior_predictive_samples = p_x_given_z.sample() tf.summary.image('posterior_predictive', tf.cast(posterior_predictive_samples, tf.float32)) # Take samples from the prior with tf.variable_scope('model', reuse=True): p_z = distributions.Normal(loc=np.zeros(FLAGS.latent_dim, dtype=np.float32), scale=np.ones(FLAGS.latent_dim, dtype=np.float32)) p_z_sample = p_z.sample(FLAGS.n_samples) p_x_given_z_logits = generative_network(z=p_z_sample, hidden_size=FLAGS.hidden_size) prior_predictive = distributions.Bernoulli(logits=p_x_given_z_logits) prior_predictive_samples = prior_predictive.sample() tf.summary.image('prior_predictive', tf.cast(prior_predictive_samples, tf.float32)) # Take samples from the prior with a placeholder with tf.variable_scope('model', reuse=True): z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim]) p_x_given_z_logits = generative_network(z=z_input, hidden_size=FLAGS.hidden_size) prior_predictive_inp = distributions.Bernoulli(logits=p_x_given_z_logits) prior_predictive_inp_sample = prior_predictive_inp.sample() # Build the evidence lower bound (ELBO) or the negative loss kl = tf.reduce_sum(distributions.kl_divergence(q_z, p_z), 1) expected_log_likelihood = tf.reduce_sum(p_x_given_z.log_prob(x), [1, 2, 3]) elbo = tf.reduce_sum(expected_log_likelihood - kl, 0) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001) train_op = optimizer.minimize(-elbo) # Merge all the summaries summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Run training sess = tf.InteractiveSession() sess.run(init_op) mnist = read_data_sets(FLAGS.data_dir, one_hot=True) print('Saving TensorBoard summaries and images to: %s' % FLAGS.logdir) train_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) # Get fixed MNIST digits for plotting posterior means during training np_x_fixed, np_y = mnist.test.next_batch(5000) np_x_fixed = np_x_fixed.reshape(5000, 28, 28, 1) np_x_fixed = (np_x_fixed > 0.5).astype(np.float32) for i in range(FLAGS.n_iterations): # Re-binarize the data at every batch; this improves results np_x, _ = mnist.train.next_batch(FLAGS.batch_size) np_x = np_x.reshape(FLAGS.batch_size, 28, 28, 1) np_x = (np_x > 0.5).astype(np.float32) sess.run(train_op, {x: np_x}) # Print progress and save samples every so often t0 = time.time() if i % FLAGS.print_every == 0: np_elbo, summary_str = sess.run([elbo, summary_op], {x: np_x}) train_writer.add_summary(summary_str, i) print('Iteration: {0:d} ELBO: {1:.3f} Examples/s: {2:.3e}'.format( i, np_elbo / FLAGS.batch_size, FLAGS.batch_size * FLAGS.print_every / (time.time() - t0))) t0 = time.time() # Save samples np_posterior_samples, np_prior_samples = sess.run( [posterior_predictive_samples, prior_predictive_samples], {x: np_x}) for k in range(FLAGS.n_samples): f_name = os.path.join( FLAGS.logdir, 'iter_%d_posterior_predictive_%d_data.jpg' % (i, k)) imsave(f_name, np_x[k, :, :, 0]) f_name = os.path.join( FLAGS.logdir, 'iter_%d_posterior_predictive_%d_sample.jpg' % (i, k)) imsave(f_name, np_posterior_samples[k, :, :, 0]) f_name = os.path.join( FLAGS.logdir, 'iter_%d_prior_predictive_%d.jpg' % (i, k)) imsave(f_name, np_prior_samples[k, :, :, 0]) # Plot the posterior predictive space if FLAGS.latent_dim == 2: np_q_mu = sess.run(q_mu, {x: np_x_fixed}) cmap = mpl.colors.ListedColormap(sns.color_palette("husl")) f, ax = plt.subplots(1, figsize=(6 * 1.1618, 6)) im = ax.scatter(np_q_mu[:, 0], np_q_mu[:, 1], c=np.argmax(np_y, 1), cmap=cmap, alpha=0.7) ax.set_xlabel('First dimension of sampled latent variable $z_1$') ax.set_ylabel('Second dimension of sampled latent variable mean $z_2$') ax.set_xlim([-10., 10.]) ax.set_ylim([-10., 10.]) f.colorbar(im, ax=ax, label='Digit class') plt.tight_layout() plt.savefig(os.path.join(FLAGS.logdir, 'posterior_predictive_map_frame_%d.png' % i)) plt.close() nx = ny = 20 x_values = np.linspace(-3, 3, nx) y_values = np.linspace(-3, 3, ny) canvas = np.empty((28 * ny, 28 * nx)) for ii, yi in enumerate(x_values): for j, xi in enumerate(y_values): np_z = np.array([[xi, yi]]) x_mean = sess.run(prior_predictive_inp_sample, {z_input: np_z}) canvas[(nx - ii - 1) * 28:(nx - ii) * 28, j * 28:(j + 1) * 28] = x_mean[0].reshape(28, 28) imsave(os.path.join(FLAGS.logdir, 'prior_predictive_map_frame_%d.png' % i), canvas) # plt.figure(figsize=(8, 10)) # Xi, Yi = np.meshgrid(x_values, y_values) # plt.imshow(canvas, origin="upper") # plt.tight_layout() # plt.savefig() # Make the gifs if FLAGS.latent_dim == 2: os.system( 'convert -delay 15 -loop 0 {0}/posterior_predictive_map_frame*png {0}/posterior_predictive.gif' .format(FLAGS.logdir)) os.system( 'convert -delay 15 -loop 0 {0}/prior_predictive_map_frame*png {0}/prior_predictive.gif' .format(FLAGS.logdir))
def main(unused_argv): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.worker_index) if FLAGS.job_name == "ps": server.join() sys.exit(0) mnist = read_data_sets(FLAGS.data_dir, one_hot=True) if FLAGS.download_only: sys.exit(0) num_workers = len(worker_hosts) worker_grpc_url = 'grpc://' + worker_hosts[0] print("Worker GRPC URL: %s" % worker_grpc_url) print("Worker index = %d" % FLAGS.worker_index) print("Number of workers = %d" % num_workers) is_chief = (FLAGS.worker_index == 0) if FLAGS.sync_replicas: if FLAGS.replicas_to_aggregate is None: replicas_to_aggregate = num_workers else: replicas_to_aggregate = FLAGS.replicas_to_aggregate # Construct device setter object device_setter = tf.train.replica_device_setter(cluster=cluster) # The device setter will automatically place Variables ops on separate # parameter servers (ps). The non-Variable ops will be placed on the workers. with tf.device(device_setter): global_step = tf.Variable(0, name="global_step", trainable=False) # Variables of the hidden layer hid_w = tf.Variable(tf.truncated_normal( [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units], stddev=1.0 / IMAGE_PIXELS), name="hid_w") hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name="hid_b") # Variables of the softmax layer sm_w = tf.Variable(tf.truncated_normal([FLAGS.hidden_units, 10], stddev=1.0 / math.sqrt(FLAGS.hidden_units)), name="sm_w") sm_b = tf.Variable(tf.zeros([10]), name="sm_b") # Ops: located on the worker specified with FLAGS.worker_index x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS]) y_ = tf.placeholder(tf.float32, [None, 10]) hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) hid = tf.nn.relu(hid_lin) y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) cross_entropy = -tf.reduce_sum( y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) opt = tf.train.AdamOptimizer(FLAGS.learning_rate) if FLAGS.sync_replicas: opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=replicas_to_aggregate, total_num_replicas=num_workers, replica_id=FLAGS.worker_index, name="mnist_sync_replicas") train_step = opt.minimize(cross_entropy, global_step=global_step) if FLAGS.sync_replicas and is_chief: # Initial token and chief queue runners required by the sync_replicas mode chief_queue_runner = opt.get_chief_queue_runner() init_tokens_op = opt.get_init_tokens_op() init_op = tf.initialize_all_variables() train_dir = tempfile.mkdtemp() sv = tf.train.Supervisor(is_chief=is_chief, logdir=train_dir, init_op=init_op, recovery_wait_secs=1, global_step=global_step) sess_config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:%d" % FLAGS.worker_index ]) # The chief worker (worker_index==0) session will prepare the session, # while the remaining workers will wait for the preparation to complete. if is_chief: print("Worker %d: Initializing session..." % FLAGS.worker_index) else: print("Worker %d: Waiting for session to be initialized..." % FLAGS.worker_index) with sv.prepare_or_wait_for_session(worker_grpc_url, config=sess_config) as sess: print("Worker %d: Session initialization complete." % FLAGS.worker_index) if FLAGS.sync_replicas and is_chief: # Chief worker will start the chief queue runner and call the init op print("Starting chief queue runner and running init_tokens_op") sv.start_queue_runners(sess, [chief_queue_runner]) sess.run(init_tokens_op) # Perform training time_begin = time.time() print("Training begins @ %f" % time_begin) local_step = 0 step = 0 while not sv.should_stop() and step < FLAGS.train_steps: # Training feed batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size) train_feed = {x: batch_xs, y_: batch_ys} _, step = sess.run([train_step, global_step], feed_dict=train_feed) local_step += 1 now = time.time() if is_chief: print( "%f: Worker %d: training step %d done (global step: %d)" % (now, FLAGS.worker_index, local_step, step)) sv.stop() if is_chief: time_end = time.time() print("Training ends @ %f" % time_end) training_time = time_end - time_begin print("Training elapsed time: %f s" % training_time) # Validation feed val_feed = { x: mnist.validation.images, y_: mnist.validation.labels } val_xent = sess.run(cross_entropy, feed_dict=val_feed) print( "After %d training step(s), validation cross entropy = %g" % (FLAGS.train_steps, val_xent))
import numpy as np import matplotlib.pyplot as plt from tensorflow.contrib.learn.python.learn.datasets import mnist as input_data mnist = input_data.read_data_sets("MNIST_data", one_hot=True) images, labels = mnist.train.next_batch(10) print(images[0]) print(labels[0]) fig = plt.figure(figsize=(8, 4)) for c, (image, label) in enumerate(zip(images, labels)): subplot = fig.add_subplot(2, 5, c + 1) subplot.set_xticks([]) subplot.set_yticks([]) subplot.set_title('%d' % np.argmax(label)) subplot.imshow(image.reshape((28, 28)), vmin=0, vmax=1, cmap=plt.cm.gray_r, interpolation="nearest")
def main(argv): del argv # unused if tf.gfile.Exists(FLAGS.model_dir): tf.logging.warning( "Warning: deleting old log directory at {}".format(FLAGS.model_dir)) tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) if FLAGS.fake_data: mnist_data = build_fake_data() else: mnist_data = mnist.read_data_sets(FLAGS.data_dir, reshape=False) (images, labels, handle, training_iterator, heldout_iterator) = build_input_pipeline( mnist_data, FLAGS.batch_size, mnist_data.validation.num_examples) # Build a Bayesian LeNet5 network. We use the Flipout Monte Carlo estimator # for the convolution and fully-connected layers: this enables lower # variance stochastic gradients than naive reparameterization. with tf.name_scope("bayesian_neural_net", values=[images]): neural_net = tf.keras.Sequential([ tfp.layers.Convolution2DFlipout(6, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.MaxPooling2D(pool_size=[2, 2], strides=[2, 2], padding="SAME"), tfp.layers.Convolution2DFlipout(16, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.MaxPooling2D(pool_size=[2, 2], strides=[2, 2], padding="SAME"), tfp.layers.Convolution2DFlipout(120, kernel_size=5, padding="SAME", activation=tf.nn.relu), tf.keras.layers.Flatten(), tfp.layers.DenseFlipout(84, activation=tf.nn.relu), tfp.layers.DenseFlipout(10) ]) logits = neural_net(images) labels_distribution = tfd.Categorical(logits=logits) # Compute the -ELBO as the loss, averaged over the batch size. neg_log_likelihood = -tf.reduce_mean(labels_distribution.log_prob(labels)) kl = sum(neural_net.losses) / mnist_data.train.num_examples elbo_loss = neg_log_likelihood + kl # Build metrics for evaluation. Predictions are formed from a single forward # pass of the probabilistic layers. They are cheap but noisy predictions. predictions = tf.argmax(logits, axis=1) accuracy, accuracy_update_op = tf.metrics.accuracy( labels=labels, predictions=predictions) # Extract weight posterior statistics for layers with weight distributions # for later visualization. names = [] qmeans = [] qstds = [] for i, layer in enumerate(neural_net.layers): try: q = layer.kernel_posterior except AttributeError: continue names.append("Layer {}".format(i)) qmeans.append(q.mean()) qstds.append(q.stddev()) with tf.name_scope("train"): optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(elbo_loss) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) # Run the training loop. train_handle = sess.run(training_iterator.string_handle()) heldout_handle = sess.run(heldout_iterator.string_handle()) for step in range(FLAGS.max_steps): _ = sess.run([train_op, accuracy_update_op], feed_dict={handle: train_handle}) if step % 100 == 0: loss_value, accuracy_value = sess.run( [elbo_loss, accuracy], feed_dict={handle: train_handle}) print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f}".format( step, loss_value, accuracy_value)) if (step+1) % FLAGS.viz_steps == 0: # Compute log prob of heldout set by averaging draws from the model: # p(heldout | train) = int_model p(heldout|model) p(model|train) # ~= 1/n * sum_{i=1}^n p(heldout | model_i) # where model_i is a draw from the posterior p(model|train). probs = np.asarray([sess.run((labels_distribution.probs), feed_dict={handle: heldout_handle}) for _ in range(FLAGS.num_monte_carlo)]) mean_probs = np.mean(probs, axis=0) image_vals, label_vals = sess.run((images, labels), feed_dict={handle: heldout_handle}) heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]), label_vals.flatten()])) print(" ... Held-out nats: {:.3f}".format(heldout_lp)) qm_vals, qs_vals = sess.run((qmeans, qstds)) if HAS_SEABORN: plot_weight_posteriors(names, qm_vals, qs_vals, fname=os.path.join( FLAGS.model_dir, "step{:05d}_weights.png".format(step))) plot_heldout_prediction(image_vals, probs, fname=os.path.join( FLAGS.model_dir, "step{:05d}_pred.png".format(step)), title="mean heldout logprob {:.2f}" .format(heldout_lp))
min_after_dequeue = 3000 # If `enqueue_many` is `False`, `tensors` is assumed to represent a # single example. An input tensor with shape `[x, y, z]` will be output # as a tensor with shape `[batch_size, x, y, z]`. # # If `enqueue_many` is `True`, `tensors` is assumed to represent a # batch of examples, where the first dimension is indexed by example, # and all members of `tensors` should have the same size in the # first dimension. If an input tensor has shape `[*, x, y, z]`, the # output will have shape `[batch_size, x, y, z]`. enqueue_many = True cache_dir = os.path.expanduser( os.path.join('~', '.keras', 'datasets', 'MNIST-data')) data = mnist.read_data_sets(cache_dir, validation_size=0) x_train_batch, y_train_batch = tf.train.shuffle_batch( tensors=[data.train.images, data.train.labels.astype(np.int32)], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, enqueue_many=enqueue_many, num_threads=8) x_train_batch = tf.cast(x_train_batch, tf.float32) x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) y_train_batch = tf.cast(y_train_batch, tf.int32) y_train_batch = tf.one_hot(y_train_batch, num_classes)
stop = timeit.default_timer() run_time = stop - start correct_predict = (y_test == predicted_y_test).astype(np.int32).sum() incorrect_predict = len(y_test) - correct_predict accuracy = float(correct_predict) / len(y_test) print("Result for {}: ".format(algorithm_name)) print("Correct Predict: {}/{} total \tAccuracy: {:5f} \tTime: {:2f}".format(correct_predict, len(y_test), accuracy, run_time)) return correct_predict, accuracy, run_time if __name__ == '__main__': # Read MNIST dataset mnist = read_data_sets("data", one_hot=False) result = [ OrderedDict( first_name='Insert your First name here', last_name='Insert your Last name here', ) ] for algorithm in [svm, exponential, knn]: x_valid, y_valid = mnist.validation._images, mnist.validation.labels if algorithm.__name__ == 'knn': x_valid, y_valid = x_valid[:1000], y_valid[:1000] correct_predict, accuracy, run_time = run(algorithm, x_valid, y_valid, algorithm_name=algorithm.__name__) result.append(OrderedDict( algorithm_name=algorithm.__name__, correct_predict=correct_predict, accuracy=accuracy,
import numpy as np import matplotlib.pyplot as plt # import mnist_data # https://stackoverflow.com/questions/21784641/installation-issue-with-matplotlib-python import gzip import os import tempfile import numpy # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/input_data.py from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets # mypath = "/Users/wanchang/Downloads/AbelProject/" + \ # "MachineLearning/Deep Learning with TensorFlow/" + \ # "ch3Using TF on a FeedForward Neural Network/MNIST_data/" mypath = "../../DataSet/MNIST_data/" __input = read_data_sets(mypath) print("__input.train.images.shape=", __input.train.images.shape) print("__input.train.labels.shape=", __input.train.labels.shape) print("__input.test.images.shape=", __input.test.images.shape) print("__input.test.labels.shape=", __input.test.labels.shape) image_0 = __input.train.images[0] image_0 = np.resize(image_0, (28,28)) label_0 = __input.train.labels[0] print('label_0 ->', label_0) plt.imshow(image_0, cmap='Greys_r') plt.show()
def MNIST_must_converge(name, model_class, optimizer_class, epochs=50, batch_size=32, initial_learning_rate=0.001, summaries=False, use_debug_session=False ): def train(epoch, dataset, batch_size, total, sess): average_loss = [] average_error = [] eye = np.eye(10) total_examples = 0 def step_decay(epoch): initial_lrate = initial_learning_rate drop = 0.5 epochs_drop = 10.0 lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) return lrate learning_rate = initial_learning_rate#step_decay(epoch) while total_examples <= total: x_batch, label_batch = dataset.next_batch(batch_size) total_examples += len(x_batch) # one hot encode y_batch = eye[label_batch] feed_dict = { train_strategy.inputs: x_batch, train_strategy.labels: y_batch, train_strategy.learning_rate: learning_rate, train_strategy.batch_size: batch_size, "global_is_training:0": True } feed_dict.update(train_strategy.train_parameters) fetches = [train_strategy.optimize, train_strategy.loss, train_strategy.global_step, train_strategy.predictions, train_strategy.categorical_error, ] if sess.should_stop(): return global_step, np.mean(average_loss), np.mean(average_error) * 100. #if not sess.should_stop(): _, loss, global_step, predictions, error = sess.run(fetches, feed_dict=feed_dict) average_loss.append(loss) average_error.append(error) err = np.mean(average_error) * 100.0 if summaries and (total_examples % 10): fetches = train_strategy.summary_op summary = sess.run(fetches, feed_dict=feed_dict) train_writer.add_summary(summary) train_writer.flush() print("writing summaries") return global_step, np.mean(average_loss), err def test(dataset, batch_size, total, sess): total_examples = 0 average_error = [] eye = np.eye(10) while total_examples <= total: x_batch, label_batch = dataset.next_batch(batch_size) total_examples += len(x_batch) feed_dict = { train_strategy.inputs: x_batch, train_strategy.labels: eye[label_batch], train_strategy.batch_size: batch_size, "global_is_training:0": False, } fetches = [ train_strategy.predictions, train_strategy.categorical_error, ] # if not sess.should_stop(): predictions, error = sess.run(fetches, feed_dict=feed_dict) average_error.append(error) err = np.mean(average_error) * 100.0 print("test err: %f" % err) return err # run test on test set at end just before closing the session class TestAtEnd(tf.train.StopAtStepHook): def __init__(self, last_step): tf.train.StopAtStepHook.__init__(self, last_step=last_step) def end(self, session): print("computing final test error") test(dataset.test, batch_size, dataset.test.num_examples, session) print("Testing %s" % name) train_strategy = generate_train_graph( model_class, optimizer_class, 28, 28, 1, 10, add_summaries=summaries) timestamp = datetime.now().strftime("%y%m%d%H%M%S") train_writer = tf.summary.FileWriter("/tmp/%s/train/%s" % (name, timestamp)) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(train_strategy.loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0 and print_logs: num_examples_per_step = batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with train_strategy.graph.as_default(): dataset = read_data_sets('/tmp/deepwater/datasets/', validation_size=0) checkpoint_directory="/tmp/checkpoint" checkpoint_file=checkpoint_directory + "/checkpoint" if os.path.isfile(checkpoint_file): os.remove(checkpoint_file) start_time = time.time() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth=True with tf.train.MonitoredTrainingSession( checkpoint_dir=checkpoint_directory, hooks=[ TestAtEnd(epochs*dataset.train.num_examples), _LoggerHook() ], config=config) as sess: epoch = 0 tf.set_random_seed(12345678) if use_debug_session: from tensorflow.python import debug as tf_debug sess = tf_debug.LocalCLIDebugWrapperSession(sess) sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) if not use_debug_session: print('computing initial test error') test_error = test(dataset.test, batch_size, dataset.test.num_examples, sess) print('initial test error: %f' % (test_error)) while not sess.should_stop() and epoch < epochs: print('epoch: %d' % (epoch)) epoch += 1 global_step, train_loss, train_error = train(epoch, dataset.train, batch_size, dataset.train.num_examples, sess) print("train: avg loss %f error %f" % (train_loss, train_error)) train_writer.close() elapsed_time = time.time() - start_time print("time %.2f s\n" % elapsed_time)