def main(args): # load the dataset dataset = mnist.get_split('test', FLAGS.data_dir) # load batch images, labels = load_batch( dataset, FLAGS.batch_size, is_training=False) # get the model prediction predictions = lenet(images) # convert prediction values for each class into single class prediction predictions = tf.to_int64(tf.argmax(predictions, 1)) # streaming metrics to evaluate metrics_to_values, metrics_to_updates = metrics.aggregate_metric_map({ 'mse': metrics.streaming_mean_squared_error(predictions, labels), 'accuracy': metrics.streaming_accuracy(predictions, labels), }) # write the metrics as summaries for metric_name, metric_value in metrics_to_values.iteritems(): tf.summary.scalar(metric_name, metric_value) # evaluate on the model saved at the checkpoint directory # evaluate every eval_interval_secs slim.evaluation.evaluation_loop( '', FLAGS.checkpoint_dir, FLAGS.log_dir, num_evals=FLAGS.num_evals, eval_op=metrics_to_updates.values(), eval_interval_secs=FLAGS.eval_interval_secs)
def main(params): train_set, valid_set, test_set = df.datasets.mnist.data() train_set_x, train_set_y = train_set test_set_x, test_set_y = test_set model = lenet() criterion = df.ClassNLLCriterion() optimiser = df.SGD(lr=params['lr']) for epoch in range(100): model.training() train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'train') train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'stats') model.evaluate() validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
def main(args): # load the dataset dataset = mnist.get_split('train', FLAGS.data_dir) # load batch of dataset images, labels = load_batch( dataset, FLAGS.batch_size, is_training=True) # run the image through the model predictions = lenet(images) # get the cross-entropy loss one_hot_labels = slim.one_hot_encoding( labels, dataset.num_classes) slim.losses.softmax_cross_entropy( predictions, one_hot_labels) total_loss = slim.losses.get_total_loss() tf.summary.scalar('loss', total_loss) # use RMSProp to optimize optimizer = tf.train.RMSPropOptimizer(0.001, 0.9) # create train op train_op = slim.learning.create_train_op( total_loss, optimizer, summarize_gradients=True) # run training slim.learning.train( train_op, FLAGS.log_dir, save_summaries_secs=20)
from model import lenet FLAGS = tf.app.flags.FLAGS # tf.app.flags.DEFINE_integer('max_steps', 1000000, # """Number of iterations to run for each workers.""") tf.app.flags.DEFINE_integer( 'max_steps', 1000, """Number of iterations to run for each workers.""") tf.app.flags.DEFINE_integer('log_frequency', 50, """How many steps between two runop logs.""") tf.app.flags.DEFINE_integer('batch_size', 32, """Batch size""") mnist = input_data.read_data_sets('MNIST_data', one_hot=True) ops = lenet() train_op = ops['train_op'] loss = ops['loss'] acc = ops['acc'] x = ops['images'] y = ops['labels'] is_training = ops['is_training'] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) start = time.time() for i in range(FLAGS.max_steps): batch = mnist.train.next_batch(FLAGS.batch_size) _, loss_ = sess.run([train_op, loss], feed_dict={ x: batch[0],
from model import lenet from tensorflow.examples.tutorials.mnist import input_data hvd.init() FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_integer( 'max_steps', 1000000, """Number of iterations to run for each workers.""") tf.app.flags.DEFINE_integer('log_frequency', 50, """How many steps between two runop logs.""") tf.app.flags.DEFINE_integer('batch_size', 32, """Batch size""") mnist = input_data.read_data_sets('MNIST_data', one_hot=True) ops = lenet(only_logits=True) logits = ops['logits'] x = ops['images'] y = ops['labels'] is_training = ops['is_training'] global_step = ops['global_step'] loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)) loss += model.weight_decay * tf.losses.get_regularization_loss() acc = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1)), tf.float32)) optimizer = tf.train.AdamOptimizer(learning_rate=model.learning_rate) optimizer = hvd.DistributedOptimizer(optimizer)
def train(): mnist = input_data.read_data_sets('MNIST_data/', one_hot=True) batch_size = cfg.BATCH_SIZE learning_rate = cfg.LEARNING_RATE parameter_path = cfg.PARAMETER_FILE num_epoches = cfg.MAX_ITER x = tf.placeholder(tf.float32, [None, 28, 28, 1]) y = tf.placeholder(tf.float32, [None, cfg.NUM_CLASSES]) keep_prob = tf.placeholder(tf.float32) generateVariables() model = lenet() logits = model.build(x, keep_prob) loss_op = loss(logits, y) optimizer_op = optimizer(loss_op, cfg.LEARNING_RATE) acc = accuracy(logits, y) # Summaries tf.summary.scalar('train_loss', loss_op) tf.summary.scalar('train_accuracy', acc) merged_summary = tf.summary.merge_all() train_writer = tf.summary.FileWriter('logs/train/') val_writer = tf.summary.FileWriter('logs/val/') saver = tf.train.Saver() # Get the number of training/validation steps per epoch train_batches_per_epoch = np.floor(mnist.train.labels.size / batch_size).astype(np.int16) val_batches_per_epoch = np.floor(mnist.test.labels.size / batch_size).astype(np.int16) with tf.Session() as sess: with tf.device("/cpu:0"): sess.run(tf.global_variables_initializer()) train_writer.add_graph(sess.graph) for epoch in range(num_epoches): print("{} Epoch number: {}".format(datetime.datetime.now(), epoch + 1)) step = 1 while step < train_batches_per_epoch: batch = mnist.train.next_batch(batch_size) batch_x = np.reshape(batch[0], [-1, 28, 28, 1]) batch_y = np.reshape(batch[1], [-1, cfg.NUM_CLASSES]) sess.run(optimizer_op, feed_dict={ x: batch_x, y: batch_y, keep_prob: cfg.KEEP_PROB }) # Logging if (step + 1) % 100 == 0: train_acc = sess.run(acc, feed_dict={ x: batch_x, y: batch_y, keep_prob: 1.0 }) print('step %d, training accuracy %g' % (step + 1, train_acc)) s = sess.run(merged_summary, feed_dict={ x: batch_x, y: batch_y, keep_prob: 1.0 }) train_writer.add_summary( s, epoch * train_batches_per_epoch + step) step += 1 # Epoch completed, start validation print("{} Start validation".format(datetime.datetime.now())) test_acc = 0. test_count = 0 for _ in range(val_batches_per_epoch): batch = mnist.test.next_batch(batch_size) batch_x = np.reshape(batch[0], [-1, 28, 28, 1]) batch_y = np.reshape(batch[1], [-1, cfg.NUM_CLASSES]) val_acc = sess.run(acc, feed_dict={ x: batch_x, y: batch_y, keep_prob: 1.0 }) test_acc += val_acc test_count += 1 test_acc /= test_count s = tf.Summary(value=[ tf.Summary.Value(tag='validation_accuracy', simple_value=test_acc) ]) val_writer.add_summary(s, epoch + 1) print("{} Validation Accuracy = {:.4f}".format( datetime.datetime.now(), test_acc)) checkpoint_path = os.path.join( 'checkpoint/', 'model_epoch' + str(epoch + 1) + '.ckpt') save_path = saver.save(sess, checkpoint_path) print("{} Model checkpoint saved at {}".format( datetime.datetime.now(), checkpoint_path))
def __init__(self): self.lenet = lenet() self.sess = tf.Session() self.parameter_path = cfg.PARAMETER_FILE