Esempio n. 1
0
def main(args):
    # load the dataset
    dataset = mnist.get_split('test', FLAGS.data_dir)

    # load batch
    images, labels = load_batch(
        dataset,
        FLAGS.batch_size,
        is_training=False)

    # get the model prediction
    predictions = lenet(images)

    # convert prediction values for each class into single class prediction
    predictions = tf.to_int64(tf.argmax(predictions, 1))

    # streaming metrics to evaluate
    metrics_to_values, metrics_to_updates = metrics.aggregate_metric_map({
        'mse': metrics.streaming_mean_squared_error(predictions, labels),
        'accuracy': metrics.streaming_accuracy(predictions, labels),
    })

    # write the metrics as summaries
    for metric_name, metric_value in metrics_to_values.iteritems():
        tf.summary.scalar(metric_name, metric_value)

    # evaluate on the model saved at the checkpoint directory
    # evaluate every eval_interval_secs
    slim.evaluation.evaluation_loop(
        '',
        FLAGS.checkpoint_dir,
        FLAGS.log_dir,
        num_evals=FLAGS.num_evals,
        eval_op=metrics_to_updates.values(),
        eval_interval_secs=FLAGS.eval_interval_secs)
Esempio n. 2
0
def main(params):
    train_set, valid_set, test_set = df.datasets.mnist.data()
    train_set_x, train_set_y = train_set
    test_set_x, test_set_y = test_set

    model = lenet()
    criterion = df.ClassNLLCriterion()
    optimiser = df.SGD(lr=params['lr'])

    for epoch in range(100):
        model.training()
        train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'train')

        train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'stats')
        model.evaluate()
        validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
Esempio n. 3
0
def main(params):
    train_set, valid_set, test_set = df.datasets.mnist.data()
    train_set_x, train_set_y = train_set
    test_set_x, test_set_y = test_set

    model = lenet()
    criterion = df.ClassNLLCriterion()
    optimiser = df.SGD(lr=params['lr'])

    for epoch in range(100):
        model.training()
        train(train_set_x, train_set_y, model, optimiser, criterion, epoch,
              params['batch_size'], 'train')

        train(train_set_x, train_set_y, model, optimiser, criterion, epoch,
              params['batch_size'], 'stats')
        model.evaluate()
        validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
Esempio n. 4
0
def main(args):
    # load the dataset
    dataset = mnist.get_split('train', FLAGS.data_dir)

    # load batch of dataset
    images, labels = load_batch(
        dataset,
        FLAGS.batch_size,
        is_training=True)

    # run the image through the model
    predictions = lenet(images)

    # get the cross-entropy loss
    one_hot_labels = slim.one_hot_encoding(
        labels,
        dataset.num_classes)
    slim.losses.softmax_cross_entropy(
        predictions,
        one_hot_labels)
    total_loss = slim.losses.get_total_loss()
    tf.summary.scalar('loss', total_loss)

    # use RMSProp to optimize
    optimizer = tf.train.RMSPropOptimizer(0.001, 0.9)

    # create train op
    train_op = slim.learning.create_train_op(
        total_loss,
        optimizer,
        summarize_gradients=True)

    # run training
    slim.learning.train(
        train_op,
        FLAGS.log_dir,
        save_summaries_secs=20)
Esempio n. 5
0
from model import lenet

FLAGS = tf.app.flags.FLAGS

# tf.app.flags.DEFINE_integer('max_steps', 1000000,
#                             """Number of iterations to run for each workers.""")

tf.app.flags.DEFINE_integer(
    'max_steps', 1000, """Number of iterations to run for each workers.""")
tf.app.flags.DEFINE_integer('log_frequency', 50,
                            """How many steps between two runop logs.""")
tf.app.flags.DEFINE_integer('batch_size', 32, """Batch size""")

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

ops = lenet()
train_op = ops['train_op']
loss = ops['loss']
acc = ops['acc']
x = ops['images']
y = ops['labels']
is_training = ops['is_training']

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    start = time.time()
    for i in range(FLAGS.max_steps):
        batch = mnist.train.next_batch(FLAGS.batch_size)
        _, loss_ = sess.run([train_op, loss],
                            feed_dict={
                                x: batch[0],
Esempio n. 6
0
from model import lenet
from tensorflow.examples.tutorials.mnist import input_data

hvd.init()

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_integer(
    'max_steps', 1000000, """Number of iterations to run for each workers.""")
tf.app.flags.DEFINE_integer('log_frequency', 50,
                            """How many steps between two runop logs.""")
tf.app.flags.DEFINE_integer('batch_size', 32, """Batch size""")

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

ops = lenet(only_logits=True)
logits = ops['logits']
x = ops['images']
y = ops['labels']
is_training = ops['is_training']
global_step = ops['global_step']

loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
loss += model.weight_decay * tf.losses.get_regularization_loss()
acc = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1)),
            tf.float32))

optimizer = tf.train.AdamOptimizer(learning_rate=model.learning_rate)
optimizer = hvd.DistributedOptimizer(optimizer)
Esempio n. 7
0
def train():
    mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
    batch_size = cfg.BATCH_SIZE
    learning_rate = cfg.LEARNING_RATE
    parameter_path = cfg.PARAMETER_FILE
    num_epoches = cfg.MAX_ITER
    x = tf.placeholder(tf.float32, [None, 28, 28, 1])
    y = tf.placeholder(tf.float32, [None, cfg.NUM_CLASSES])
    keep_prob = tf.placeholder(tf.float32)

    generateVariables()
    model = lenet()
    logits = model.build(x, keep_prob)
    loss_op = loss(logits, y)
    optimizer_op = optimizer(loss_op, cfg.LEARNING_RATE)

    acc = accuracy(logits, y)

    # Summaries
    tf.summary.scalar('train_loss', loss_op)
    tf.summary.scalar('train_accuracy', acc)
    merged_summary = tf.summary.merge_all()

    train_writer = tf.summary.FileWriter('logs/train/')
    val_writer = tf.summary.FileWriter('logs/val/')
    saver = tf.train.Saver()

    # Get the number of training/validation steps per epoch
    train_batches_per_epoch = np.floor(mnist.train.labels.size /
                                       batch_size).astype(np.int16)
    val_batches_per_epoch = np.floor(mnist.test.labels.size /
                                     batch_size).astype(np.int16)

    with tf.Session() as sess:
        with tf.device("/cpu:0"):
            sess.run(tf.global_variables_initializer())
            train_writer.add_graph(sess.graph)
            for epoch in range(num_epoches):
                print("{} Epoch number: {}".format(datetime.datetime.now(),
                                                   epoch + 1))
                step = 1
                while step < train_batches_per_epoch:
                    batch = mnist.train.next_batch(batch_size)
                    batch_x = np.reshape(batch[0], [-1, 28, 28, 1])
                    batch_y = np.reshape(batch[1], [-1, cfg.NUM_CLASSES])
                    sess.run(optimizer_op,
                             feed_dict={
                                 x: batch_x,
                                 y: batch_y,
                                 keep_prob: cfg.KEEP_PROB
                             })

                    # Logging
                    if (step + 1) % 100 == 0:
                        train_acc = sess.run(acc,
                                             feed_dict={
                                                 x: batch_x,
                                                 y: batch_y,
                                                 keep_prob: 1.0
                                             })
                        print('step %d, training accuracy %g' %
                              (step + 1, train_acc))
                        s = sess.run(merged_summary,
                                     feed_dict={
                                         x: batch_x,
                                         y: batch_y,
                                         keep_prob: 1.0
                                     })
                        train_writer.add_summary(
                            s, epoch * train_batches_per_epoch + step)
                    step += 1

                # Epoch completed, start validation
                print("{} Start validation".format(datetime.datetime.now()))
                test_acc = 0.
                test_count = 0
                for _ in range(val_batches_per_epoch):
                    batch = mnist.test.next_batch(batch_size)
                    batch_x = np.reshape(batch[0], [-1, 28, 28, 1])
                    batch_y = np.reshape(batch[1], [-1, cfg.NUM_CLASSES])
                    val_acc = sess.run(acc,
                                       feed_dict={
                                           x: batch_x,
                                           y: batch_y,
                                           keep_prob: 1.0
                                       })
                    test_acc += val_acc
                    test_count += 1
                test_acc /= test_count
                s = tf.Summary(value=[
                    tf.Summary.Value(tag='validation_accuracy',
                                     simple_value=test_acc)
                ])
                val_writer.add_summary(s, epoch + 1)
                print("{} Validation Accuracy = {:.4f}".format(
                    datetime.datetime.now(), test_acc))

                checkpoint_path = os.path.join(
                    'checkpoint/', 'model_epoch' + str(epoch + 1) + '.ckpt')
                save_path = saver.save(sess, checkpoint_path)
                print("{} Model checkpoint saved at {}".format(
                    datetime.datetime.now(), checkpoint_path))
Esempio n. 8
0
 def __init__(self):
     self.lenet = lenet()
     self.sess = tf.Session()
     self.parameter_path = cfg.PARAMETER_FILE