def main(params):

    parser = argparse.ArgumentParser()
    parser.add_argument('--num_epochs',
                        type=int,
                        default=300,
                        help='Number of epochs to train for')
    parser.add_argument('--epoch_start_i',
                        type=int,
                        default=0,
                        help='Start counting epochs from this number')
    parser.add_argument('--checkpoint_step',
                        type=int,
                        default=5,
                        help='How often to save checkpoints (epochs)')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.01,
                        help='learning rate used for train')
    parser.add_argument('--cuda',
                        type=str,
                        default='0',
                        help='GPU ids used for training')
    parser.add_argument('--save_model_path',
                        type=str,
                        default=None,
                        help='path to save model')
    parser.add_argument('--pretrained_model_path',
                        type=str,
                        default=None,
                        help='path to pretrained model')

    args = parser.parse_args(params)

    # create dataset and dataloader
    dataloader_train = DataLoader(train(input_transform, target_transform),
                                  num_workers=1,
                                  batch_size=2,
                                  shuffle=True)

    # build model
    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
    model = DANet(nclass=2, backbone='resnet50', aux=False, se_loss=False)
    model = model.cuda()

    # build optimizer
    optimizer = torch.optim.RMSprop(model.parameters(), args.learning_rate)

    # load pretrained model if exists
    if args.pretrained_model_path is not None:
        print('load model from %s ...' % args.pretrained_model_path)
        model.module.load_state_dict(torch.load(args.pretrained_model_path))
        print('Done!')
    # train
    train(args, model, optimizer, dataloader_train)
 def train_input_fn():
     ds = dataset.train(data_path)
     ds = ds.shuffle(buffer_size=50000)
     ds = ds.take(5000)  # just to speed up training
     ds = ds.batch(params['batch_size'])
     ds = ds.repeat(params['nb_epochs'])
     return ds
Exemple #3
0
    def __init__(self, backend):

        self.train_data = dataset.train("/tmp/mnist_data")
        self.test_data = dataset.test("/tmp/mnist_data")
        self.backend = backend

        self.train_images = np.reshape(
            self.train_data['images'][:TRAIN_SIZE],
            (-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH))
        self.train_labels = self.train_data['labels'][:TRAIN_SIZE]

        self.test_images = np.reshape(
            self.test_data['images'][:TRAIN_SIZE],
            (-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH))
        self.test_labels = self.test_data['labels'][:TRAIN_SIZE]

        if backend == 'gpu':
            self.device = "/device:GPU:0"
        else:
            assert backend == 'cpu', 'Invalid backend specified: %s' % backend
            self.device = "/cpu:0"

        print("Creating model")

        self.model = MNIST.create_model()
Exemple #4
0
def main(_):

    print('Loading dataset')
    mnist = input_data.read_data_sets(FLAGS.data_dir)
    print('%d train images' % mnist.train.num_examples)
    print('%d test images' % mnist.test.num_examples)

    batch_size = 128
    max_steps = 10000

    train_ds = dataset.train('/tmp/mnist')
    train_ds = train_ds.shuffle(buffer_size=50000)
    train_ds = train_ds.batch(batch_size)
    train_ds = train_ds.repeat(1)

    x, y = train_ds.make_one_shot_iterator().get_next()
    logits = build_model(x)

    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y,
                                                           logits=logits)
    loss = tf.reduce_mean(cross_entropy)
    train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)

    correct_prediction = tf.cast(
        tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64)), tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(max_steps):
            _, train_loss, train_acc = sess.run([train_op, loss, accuracy])
            if i % 10 == 0:
                print('step %d: train_loss=%f train_acc=%g' %
                      (i, train_loss, train_acc))
 def train_input_fn():
   ds = dataset.train(data_dir)
   ds = ds.cache()
   ds = ds.shuffle(buffer_size=50000)
   ds = ds.batch(batch_size)
   ds = ds.repeat(1)
   return ds      
Exemple #6
0
def train_data(params):
    batch_size = params['batch_size']
    data_dir = params['data_dir']
    data = dataset.train(data_dir)
    data = data.cache().repeat().shuffle(buffer_size=50000)
    data = data.batch(batch_size, drop_remainder=True)  # drop??
    return data
def main(unused):

    if FLAGS.run_gpu:
        backend = "/device:GPU:0"
    else:
        backend = "/cpu:0"

    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              model_dir=FLAGS.model_dir,
                                              params={
                                                  'backend': backend,
                                              })

    if FLAGS.mode == 'train' or FLAGS.mode == 'both':
        ds = dataset.train(FLAGS.data_dir)

        # Train the model
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": ds['images']},
            y=ds['labels'],
            batch_size=FLAGS.batch_size,
            num_epochs=FLAGS.train_epochs,
            shuffle=True)

        mnist_classifier.train(input_fn=train_input_fn, steps=200)

    if FLAGS.mode == 'predict' or FLAGS.mode == 'both':
        ds = dataset.test(FLAGS.data_dir)

        # Predict test set
        pred_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": ds['images']}, shuffle=False)

        mnist_classifier.predict(input_fn=pred_input_fn)
 def train_input_fn():
     # When choosing shuffle buffer sizes, larger sizes result in better
     # randomness, while smaller sizes use less memory. MNIST is a small
     # enough dataset that we can easily shuffle the full epoch.
     ds = dataset.train(FLAGS.data_dir)
     ds = ds.cache().shuffle(buffer_size=50000).batch(
         FLAGS.batch_size).repeat(FLAGS.train_epochs)
     return ds
Exemple #9
0
 def train_input_fn():
   # When choosing shuffle buffer sizes, larger sizes result in better
   # randomness, while smaller sizes use less memory. MNIST is a small
   # enough dataset that we can easily shuffle the full epoch.
   ds = dataset.train(FLAGS.data_dir)
   ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat(
       FLAGS.train_epochs)
   return ds
Exemple #10
0
 def train_input_fn():
     # When choosing shuffle buffer sizes, larger sizes result in better
     # randomness, while smaller sizes use less memory. MNIST is a small
     # enough dataset that we can easily shuffle the full epoch.
     ds = dataset.train(FLAGS.data_dir)
     # ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat(
     ds = ds.cache().batch(FLAGS.batch_size).repeat(FLAGS.train_epochs)
     (images, labels) = ds.make_one_shot_iterator().get_next()
     return (images, labels)
Exemple #11
0
def eval_data(params):
    batch_size = params['batch_size']
    data_dir = params['data_dir']
    data = dataset.train(data_dir)
    # Take out top several samples from test data to make the predictions.
    data = data.cache().repeat().shuffle(
        buffer_size=50000)  # shuffle too slow ??
    data = data.batch(batch_size, drop_remainder=True)
    return data
 def train_input_fn():
   # When choosing shuffle buffer sizes, larger sizes result in better
   # randomness, while smaller sizes use less memory. MNIST is a small
   # enough dataset that we can easily shuffle the full epoch.
   ds = dataset.train(DATA_DIR)
   ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat(FLAGS.num_epochs)
   (images, labels) = tf.compat.v1.data.make_one_shot_iterator(ds).get_next()
   (cimages, clabels) = tf.compat.v1.data.make_one_shot_iterator(ds).get_next()
   count_epochs(cimages)
   return (images, labels)
def train_input_fn(params):
    """train_input_fn defines the input pipeline used for training."""
    batch_size = params["batch_size"]
    data_dir = params["data_dir"]
    # Retrieves the batch size for the current shard. The # of shards is
    # computed according to the input pipeline deployment. See
    # `tf.contrib.tpu.RunConfig` for details.
    ds = dataset.train(data_dir).cache().repeat().shuffle(
        buffer_size=50000).batch(batch_size, drop_remainder=True)
    return ds
Exemple #14
0
def train_input_fn(params):
    batch_size = params["batch_size"]
    data_dir = params["data_dir"]
    # Retrieves the batch size for the current shard. The # of shards is
    # computed according to the input pipeline deployment. See
    # `tf.contrib.tpu.RunConfig` for details.
    ds = dataset.train(data_dir).cache().repeat().shuffle(
        buffer_size=50000).apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
    images, labels = ds.make_one_shot_iterator().get_next()
    return images, labels
Exemple #15
0
def train():
    questions = list(questions_from_dataset(dataset.train()))
    random.shuffle(questions)
    test_questions = list(questions_from_dataset(dataset.test()))
    random.shuffle(test_questions)

    i = 0
    for i, batch in enumerate(iterate_batches(questions, size=20)):
        n.train(batch)
        if i % 10 == 0:
            n.save(i)
Exemple #16
0
def train_input_fn(params):
  batch_size = params["batch_size"]
  data_dir = params["data_dir"]
  # Retrieves the batch size for the current shard. The # of shards is
  # computed according to the input pipeline deployment. See
  # `tf.contrib.tpu.RunConfig` for details.
  ds = dataset.train(data_dir).cache().repeat().shuffle(
      buffer_size=50000).apply(
          tf.contrib.data.batch_and_drop_remainder(batch_size))
  images, labels = ds.make_one_shot_iterator().get_next()
  return images, labels
Exemple #17
0
def run_mnist_eager():
    """Run MNIST training and eval loop in eager mode.
    """

    data_dir = '/tmp/tensorflow/mnist/input_data' + str(ddl.rank())
    model_dir = '/tmp/tensorflow/mnist/checkpoints/' + str(ddl.rank()) + '/'

    # Delete model dir
    if os.path.isdir(model_dir) and ddl.local_rank() == 0:
        shutil.rmtree(model_dir)

    data_format = 'channels_first'

    # Load the datasets
    train_ds, _ = mnist_dataset.train(data_dir, (1, 28, 28), label_int=True)
    train_ds = train_ds.shard(ddl.size(),
                              ddl.rank()).shuffle(60000).batch(batch_size)
    test_ds, _ = mnist_dataset.test(data_dir, (1, 28, 28), label_int=True)
    test_ds = test_ds.batch(batch_size)

    # Create the model and optimizer
    model = create_model(data_format)
    optimizer = tf.train.MomentumOptimizer(0.01, 0.5)

    train_dir = None
    test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(model_dir, 'ckpt-r' + str(ddl.rank()))
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tf.train.Checkpoint(model=model,
                                     optimizer=optimizer,
                                     step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(model_dir))

    # Train and evaluate for a set number of epochs.
    for _ in range(train_epochs):
        start = time.time()
        with summary_writer.as_default():
            train(model, optimizer, train_ds, step_counter, 10)
        end = time.time()
        if ddl.rank() == 0:
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
        with test_summary_writer.as_default():
            test(model, test_ds)
        checkpoint.save(checkpoint_prefix)
Exemple #18
0
def main(_):
    tfe.enable_eager_execution()

    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch(
        FLAGS.batch_size)
    test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

    # Create the model and optimizer
    model = mnist.Model(data_format)
    optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

    if FLAGS.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(FLAGS.output_dir, 'train')
        test_dir = os.path.join(FLAGS.output_dir, 'eval')
        tf.gfile.MakeDirs(FLAGS.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tfe.Checkpoint(model=model,
                                optimizer=optimizer,
                                step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
    # Train and evaluate for 10 epochs.
    with tf.device(device):
        for _ in range(10):
            start = time.time()
            with summary_writer.as_default():
                train(model, optimizer, train_ds, step_counter,
                      FLAGS.log_interval)
            end = time.time()
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            checkpoint.save(checkpoint_prefix)
    def train_input_fn():
        """Prepare data for training."""

        # When choosing shuffle buffer sizes, larger sizes result in better
        # randomness, while smaller sizes use less memory. MNIST is a small
        # enough dataset that we can easily shuffle the full epoch.
        ds = dataset.train(flags_obj.data_dir)
        ds = ds.cache().shuffle(buffer_size=50000).batch(flags_obj.batch_size)

        # Iterate through the dataset a set number (`epochs_between_evals`) of times
        # during each training session.
        ds = ds.repeat(flags_obj.epochs_between_evals)
        return ds
Exemple #20
0
def main(_):
  tfe.enable_eager_execution()

  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch(
      FLAGS.batch_size)
  test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

  # Create the model and optimizer
  model = mnist.Model(data_format)
  optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

  if FLAGS.output_dir:
    # Create directories to which summaries will be written
    # tensorboard --logdir=<output_dir>
    # can then be used to see the recorded summaries.
    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'eval')
    tf.gfile.MakeDirs(FLAGS.output_dir)
  else:
    train_dir = None
    test_dir = None
  summary_writer = tf.contrib.summary.create_file_writer(
      train_dir, flush_millis=10000)
  test_summary_writer = tf.contrib.summary.create_file_writer(
      test_dir, flush_millis=10000, name='test')
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
  step_counter = tf.train.get_or_create_global_step()
  checkpoint = tfe.Checkpoint(
      model=model, optimizer=optimizer, step_counter=step_counter)
  # Restore variables on creation if a checkpoint exists.
  checkpoint.restore(tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
  # Train and evaluate for 10 epochs.
  with tf.device(device):
    for _ in range(10):
      start = time.time()
      with summary_writer.as_default():
        train(model, optimizer, train_ds, step_counter, FLAGS.log_interval)
      end = time.time()
      print('\nTrain time for epoch #%d (%d total steps): %f' %
            (checkpoint.save_counter.numpy() + 1,
             step_counter.numpy(),
             end - start))
      with test_summary_writer.as_default():
        test(model, test_ds)
      checkpoint.save(checkpoint_prefix)
def train_input_fn(data_dir, batch_size=100):
  """Prepare data for training."""

  # When choosing shuffle buffer sizes, larger sizes result in better
  # randomness, while smaller sizes use less memory. MNIST is a small
  # enough dataset that we can easily shuffle the full epoch.
  ds = dataset.train(data_dir)
  ds = ds.cache().shuffle(buffer_size=50000).batch(batch_size=batch_size)

  # Iterate through the dataset a set number of times
  # during each training session.
  ds = ds.repeat(40)
  features = ds.make_one_shot_iterator().get_next()
  return {'pixels': features[0]}, features[1]
def main(model_uri, data_path):

    tf_graph = tf.Graph()
    with tf.Session(graph=tf_graph) as sess:
        with tf_graph.as_default():

            # Use MNIST Dataset we have used to training
            ds = dataset.train(data_path)
            next_op = tf.data.make_one_shot_iterator(ds).get_next()

            # Load the MLflow model
            signature_def = mlflow.tensorflow.load_model(model_uri=model_uri,
                                                         tf_sess=sess)
            input_tensors = {
                input_signature.name:
                tf_graph.get_tensor_by_name(input_signature.name)
                for _, input_signature in signature_def.inputs.items()
            }
            output_tensors = {
                output_signature.name:
                tf_graph.get_tensor_by_name(output_signature.name)
                for _, output_signature in signature_def.outputs.items()
            }

            for _ in range(10):
                # This uses a 2-step process:
                #  1. Run the `next_op` to fetch the next image from the dataset
                #  2. Use a feed dictionary to run the prediction
                # This is for purpose of demonstration only and should never be
                # used in a real system because this is very inefficient.
                image, label = sess.run(next_op)
                feed_dict = {
                    input_tensors['images:0']: np.expand_dims(image, axis=0)
                }
                pred = sess.run(output_tensors['ArgMax:0'],
                                feed_dict=feed_dict)[0]
                correct = 'SAME' if label == pred else 'NOT_SAME'
                print(label, pred, correct)
Exemple #23
0
def run_mnist_eager(flags_obj):
    """
    Run MNIST training and eval loop in eager mode.

    Args:
      flags_obj: An object containing parsed flag values.
    """

    # Soft placement
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    tfe.enable_eager_execution(config=config)

    model_helpers.apply_clean(flags.FLAGS)

    # Automatically determine device and data_format
    (device, data_format) = ('/gpu:0', 'channels_first')
    if flags_obj.no_gpu or not tf.test.is_gpu_available():
        (device, data_format) = ('/cpu:0', 'channels_last')

    # If data_format is defined in FLAGS, overwrite automatically set value.
    if flags_obj.data_format is not None:
        data_format = flags_obj.data_format

    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    train_ds = mnist_dataset.train(flags_obj.data_dir).shuffle(60000).batch(
        flags_obj.batch_size)
    test_ds = mnist_dataset.test(flags_obj.data_dir).batch(
        flags_obj.batch_size)

    # Create the model and optimizer
    model = model_lib.create_model(data_format)
    optimizer = tf.train.MomentumOptimizer(flags_obj.lr, flags_obj.momentum)

    # Print model summary
    print(model.summary())

    # Create file writers for writing TensorBoard summaries.
    if flags_obj.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(flags_obj.output_dir, 'train')
        test_dir = os.path.join(flags_obj.output_dir, 'eval')
        tf.gfile.MakeDirs(flags_obj.output_dir)
    else:
        train_dir = None
        test_dir = None

    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(flags_obj.model_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tf.train.Checkpoint(model=model,
                                     optimizer=optimizer,
                                     step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(flags_obj.model_dir))

    # Train and evaluate for a set number of epochs.
    with tf.device(device):
        for _ in range(flags_obj.train_epochs):
            start = time.time()
            with summary_writer.as_default():
                train(model, optimizer, train_ds, step_counter,
                      flags_obj.log_interval)
            end = time.time()

            # Note time taken
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            checkpoint.save(checkpoint_prefix)
Exemple #24
0
import tensorflow as tf
tf.enable_eager_execution()

tfe = tf.contrib.eager

import mnist

import dataset  # download dataset.py file
dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32)


def loss(model, x, y):
    prediction = model(x)
    return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction)


def grad(model, inputs, targets):

    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets)

    return tape.gradient(loss_value, model.variables)


optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

x, y = iter(dataset_train).next()
print("Initial loss: {:.3f}".format(loss(model, x, y)))

# Training loop
for (i, (x, y)) in enumerate(dataset_train):
from keras.models import Sequential
#from sklearn.model_selection import train_test_split
import numpy as np

import dataset
import os

batch_size = 64
num_classes = 62
epochs = 10
img_rows, img_cols = 28, 28

print('Start loading data.')
#Da modificar
folder_path = os.getcwd()
train_dataset = dataset.train(folder_path + '\emnist')
test_dataset = dataset.test(folder_path + '\emnist')
print('Data has been loaded.')

#Non so se le reshape dei tensor x e y contenenti train e test vadano fatte
if K.image_data_format() == 'channels_first':
    #x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    #x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    #x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    #x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
"""print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')"""
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# data_dir
data_dir = "/tmp/mnist_convnet_model_data" + str(ddl.rank())

input_shape = ()
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)

# the data, split between train and test sets
(train_set, num_of_train_imgs) = dataset.train(data_dir, input_shape)
train_set = train_set.shard(ddl.size(), ddl.rank())
train_set = train_set.cache().shuffle(
    buffer_size=1000).batch(batch_size).repeat()

(eval_set, num_of_test_imgs) = dataset.test(data_dir, input_shape)
eval_full = eval_set
eval_set = eval_set.shard(ddl.size(), ddl.rank())
eval_set = eval_set.batch(batch_size).repeat()

num_of_all_test_imgs = num_of_test_imgs
num_of_train_imgs /= ddl.size()
num_of_test_imgs /= ddl.size()

model = Sequential()
model.add(
Exemple #27
0
def main(argv):
    parser = MNISTEagerArgParser()
    flags = parser.parse_args(args=argv[1:])

    # TF v1.7
    tfe.enable_eager_execution()

    # Automatically determine device and data_format
    (device, data_format) = ('/gpu:0', 'channels_first')
    if flags.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    # If data_format is defined in FLAGS, overwrite automatically set value.
    if flags.data_format is not None:
        data_format = flags.data_format

    # Log Info
    print("-" * 64)
    print("TEST INFO - EAGER")
    print("-" * 64)
    print("TF version:\t {}".format(tf.__version__))
    print("Eager execution:\t {}".format(tf.executing_eagerly()))
    print("Dataset:\t MNIST")
    print("Model:\t CNN")
    print('Device:\t {}'.format(device))

    if data_format == 'channels_first':
        print("Data format:\t NCHW (channel first)")
    else:
        print("Data format:\t NHWC (channel last)")

    print("=" * 64)

    # Load the datasets
    train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch(
        flags.batch_size)
    test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size)

    # Create the model and optimizer
    # model = create_model(data_format)
    model = MNISTModel(data_format)
    optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum)

    # Create file writers for writing TensorBoard summaries.
    if flags.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(flags.output_dir, 'train')
        test_dir = os.path.join(flags.output_dir, 'eval')
        tf.gfile.MakeDirs(flags.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tfe.Checkpoint(model=model,
                                optimizer=optimizer,
                                step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir))

    # Train and evaluate for a set number of epochs.
    with tf.device(device):
        for _ in range(flags.train_epochs):
            start = time.time()
            with summary_writer.as_default():
                train(model, optimizer, train_ds, step_counter,
                      flags.log_interval)
            end = time.time()
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            checkpoint.save(checkpoint_prefix)
def train_data():
    data = dataset.train(FLAGS.data_dir)
    data = data.cache()
    data = data.batch(FLAGS.batch_size)
    return data
Exemple #29
0
import tensorflow as tf
import dataset

sess = tf.InteractiveSession()

mnist_train = dataset.train("./mnist_data")
mnist_test = dataset.test("./mnist_data")

# https://www.tensorflow.org/guide/datasets
print(mnist_train.output_shapes, mnist_train.output_types)
print(mnist_test.output_shapes, mnist_test.output_types)
batched_train = mnist_train.batch(100)

iterator = batched_train.make_one_shot_iterator()
next_element = iterator.get_next()

x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# y is a matrix
y = tf.nn.softmax(tf.matmul(x, W) + b)

# TODO: try [None, 10]
y_ = tf.placeholder(tf.int32, [None])
# TODO: why this not work?
# y_ = tf.placeholder(tf.int32, [None, 1])
# y_ = tf.placeholder(tf.float32, [None, 10])

# cause y is a 2D matrix, note the meaning of reduce_sum
one_hot_y = tf.one_hot(y_, 10)
Exemple #30
0
def main(_):
    # Parameters
    learning_rate = 0.001
    training_iters = FLAGS.num_iterations
    batch_size = 100
    display_step = 1

    # Network Parameters
    n_input = 784 # MNIST data input (img shape: 28*28)
    n_classes = 10 # MNIST total classes (0-9 digits)
    dropout = 0.75 # Dropout, probability to keep units

    ############################################################################
    # Import MNIST data
    ############################################################################
    data_dir = FLAGS.data_dir + str(ddl.local_rank())
    (train_set, num_of_train_imgs) = dataset.train(data_dir, (28, 28, 1), VARTYPE)
    train_set = train_set.shard(ddl.size(), ddl.rank())
    train_set = train_set.batch(batch_size).cache().shuffle(buffer_size=1000).repeat()

    X_train, Y_train = train_set.make_one_shot_iterator().get_next()

    # Construct model
    pred, keep_prob = deepnn(X_train)

    # Define loss and optimizer
    with tf.name_scope('loss'):
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_train, logits=pred))

    with tf.name_scope('adam_optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4)
        objective = optimizer.minimize(cost)

    # Evaluate model
    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y_train, 1))
        correct_prediction = tf.cast(correct_prediction, VARTYPE)
        accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    # Launch the graph
    with tf.Session(config=tf.ConfigProto()) as sess:
        sess.run(tf.global_variables_initializer())
        my_variable = bias_variable([5, 5, 1, 32])
        sess.run(my_variable.initializer)
        step = 1
        # Keep training until reach max iterations
        while step * batch_size < training_iters:
            # Run optimization op (backprop)
            sess.run(objective)
            if step % display_step == 0:
                # Calculate batch loss and accuracy
                loss, acc = sess.run([cost, accuracy])
                print("DDL " + str(ddl.rank()) + "] Iter " + str(step * batch_size) +
                  ", Minibatch Loss= " + "{:.6f}".format(loss) +
                  ", Training Accuracy= " + "{:.5f}".format(acc))
            step += 1

        print("DDL "+str(ddl.rank())+"] Optimization Finished!")

        # Calculate accuracy for 256 mnist test images
        print("DDL "+str(ddl.rank())+"] Testing Accuracy:", sess.run(accuracy))
Exemple #31
0
def train(model):
    best_acc = 0
    num_epochs = 60

    loader = DataLoader(train(input_transform, target_transform),
                        num_workers=1,
                        batch_size=4,
                        shuffle=True)
    criterion = nn.CrossEntropyLoss()
    savedir = './save'

    automated_log_path = savedir + "/log.txt"
    modeltxtpath = savedir + "/model.txt"

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    optimizer = Adam(model.parameters(),
                     1e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=2e-4)  ## scheduler 1
    start_epoch = 1
    lr_updater = lr_scheduler.StepLR(optimizer, 100, 0.1)  ## scheduler 2

    #Note: this only loads initialized weights. If you want to resume a training use "--resume" option!!
    def load_my_state_dict(
        model, state_dict
    ):  #custom function to load model when not all dict keys are there
        own_state = model.state_dict()
        for name, param in state_dict.items():
            if name not in own_state:
                continue
            own_state[name].copy_(param)
        return model

    for epoch in range(start_epoch, num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        lr_updater.step()

        epoch_loss = []
        time_train = []

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):

            start_time = time.time()

            images = images.cuda()
            labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            #print inputs.size(),targets.size()
            outputs = model(inputs)

            #print outputs.size()
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()
            #print loss.item()
            epoch_loss.append(loss.item())
            time_train.append(time.time() - start_time)

            if step % 100 == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print('epoch:%f' % epoch, 'step:%f' % step,
                      'loss:%f' % average)

            with open(automated_log_path, "a") as myfile:
                myfile.write("\n%d\t\t%d\t\t%.4f" % (epoch, step, average))
        if epoch % 1 == 0 and epoch != 0:

            filename = 'main-' + 'eda' + '-step-' + str(
                step) + '-epoch-' + str(epoch) + '.pth'
            torch.save(model.state_dict(), './save/model/' + filename)
    return (model)
Exemple #32
0
def main():
    savedir = './save'

    Net = EDANet(NUM_CLASSES)
    Net = Net.cuda()
    train(Net)
Exemple #33
0
def mnist(learning_rate, initializer_mode, num_conv_layers, num_fc_layers):
    if num_conv_layers not in [1, 2]:
        raise ValueError("num_conv_layers should be 1 or 2")
    if num_fc_layers not in [1, 2]:
        raise ValueError("num_fc_layers should be 1 or 2")

    def make_hyperparameter_string(learning_rate, initializer_mode,
                                   num_conv_layers, num_fc_layers):
        hyperparameter = "lr_%.e_" % learning_rate
        if initializer_mode == 0:
            hyperparameter += "xavier_constant"
        else:
            hyperparameter += "truncated_normal_constant"
        hyperparameter += "_%d_conv_%d_fc" % (num_conv_layers, num_fc_layers)
        return hyperparameter

    learning_rate = learning_rate
    if initializer_mode == 0:
        weights_initializer = tf.contrib.layers.xavier_initializer()
        biases_initializer = tf.constant_initializer(0.1)
    else:
        weights_initializer = tf.truncated_normal_initializer(stddev=0.1)
        biases_initializer = tf.constant_initializer(0.1)
    logdir = "logs/mnist/" + make_hyperparameter_string(
        learning_rate, initializer_mode, num_conv_layers, num_fc_layers)

    if not tf.gfile.Exists(logdir):
        tf.gfile.MakeDirs(logdir)

    def mnist_net(x):
        endpoints = {}
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            activation_fn=tf.nn.relu,
                            weights_initializer=weights_initializer,
                            biases_initializer=biases_initializer):
            with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                                stride=1,
                                padding="SAME"):
                net = slim.conv2d(x, 32, [5, 5], scope="conv1")
                net = slim.max_pool2d(net, [2, 2], stride=2, scope="pool1")
                endpoints["block1"] = net
                if num_conv_layers == 2:
                    net = slim.conv2d(net, 64, [5, 5], scope="conv2")
                    net = slim.max_pool2d(net, [2, 2], stride=2, scope="pool2")
                    endpoints["block2"] = net
                    net = tf.reshape(net, shape=[-1, 7 * 7 * 64])
                elif num_conv_layers == 1:
                    net = tf.reshape(net, shape=[-1, 14 * 14 * 32])
                if num_fc_layers == 1:
                    logits = slim.fully_connected(net, 10, scope="fc")
                else:
                    logits = slim.stack(net,
                                        slim.fully_connected, [1024, 10],
                                        scope="fc")
                endpoints["logits"] = logits
        return logits, endpoints

    # ValueError: Variable conv1/weights already exists, disallowed.
    # Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:
    graph = tf.Graph()
    with graph.as_default():
        with tf.name_scope("input"):
            images = tf.placeholder(tf.float32,
                                    shape=[None, 784],
                                    name="images")
            images_3d = tf.reshape(images,
                                   shape=[-1, 28, 28, 1],
                                   name="images_3d")
            labels = tf.placeholder(tf.uint8, shape=[None], name="labels")
            onehot_labels = tf.one_hot(indices=labels,
                                       depth=10,
                                       name="onehot_labels")

        logits, endpoints = mnist_net(images_3d)

        with tf.name_scope("loss"):
            # loss = slim.losses.softmax_cross_entropy(logits, onehot_labels)
            loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                                   logits=logits)
            # 要注释掉这一行,否则会在 tensorboard 中出现两次 softmax_cross_entropy_loss,应该是上面的一行已经加了一次了
            # tf.losses.add_loss(loss) # Letting TF-Slim know about the additional loss.
            total_loss = tf.losses.get_total_loss(
                add_regularization_losses=False)
            # tf.add_to_collection('EXTRA_LOSSES', total_loss)

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(
                labels, tf.cast(tf.argmax(logits, axis=1), tf.uint8))
            accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

        with tf.name_scope("optimize"):
            optimizer = tf.train.AdamOptimizer(learning_rate)
            # create_train_op ensures that each time we ask for the loss, the update_ops
            # are run and the gradients being computed are applied too.
            train_op = slim.learning.create_train_op(total_loss, optimizer)

        # batch size 100 要比 30 好很多,也要稳很多
        train_set = dataset.train("MNIST-data").cache().shuffle(
            buffer_size=1000).batch(100).repeat(num_epoch)
        test_set = dataset.test("MNIST-data").cache().batch(30).repeat()

        iterator = train_set.make_one_shot_iterator()
        one_element = iterator.get_next()
        iterator_test = test_set.make_one_shot_iterator()
        one_element_test = iterator_test.get_next()

        init_op = tf.global_variables_initializer()
        log_writer = tf.summary.FileWriter(logdir)

        # summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        summaries = set()
        for key in endpoints:
            summaries.add(tf.summary.histogram("block/" + key, endpoints[key]))
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))
        for loss in tf.get_collection(tf.GraphKeys.LOSSES):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        # for loss in tf.get_collection('EXTRA_LOSSES'):
        # summaries.add(tf.summary.scalar(loss.op.name, loss))
        # summaries.add(tf.summary.scalar("accuracy", accuracy))
        accuracy_train_summary_op = tf.summary.scalar("accuracy_train",
                                                      accuracy)
        accuracy_test_summary_op = tf.summary.scalar("accuracy_test", accuracy)
        summaries.add(tf.summary.image("image", images_3d, 4))
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        step = 0
        with tf.Session() as sess:
            log_writer.add_graph(sess.graph)
            sess.run(init_op)
            try:
                while True:
                    images_, labels_ = sess.run(one_element)
                    sess.run(train_op,
                             feed_dict={
                                 images: images_,
                                 labels: labels_
                             })
                    if step % 10 == 0:
                        summary_, accuracy_train_summary = sess.run(
                            [summary_op, accuracy_train_summary_op],
                            feed_dict={
                                images: images_,
                                labels: labels_
                            })
                        images_, labels_ = sess.run(one_element_test)
                        accuracy_test_summary = sess.run(
                            accuracy_test_summary_op,
                            feed_dict={
                                images: images_,
                                labels: labels_
                            })
                        log_writer.add_summary(summary_, step)
                        log_writer.add_summary(accuracy_train_summary, step)
                        log_writer.add_summary(accuracy_test_summary, step)
                    step += 1
            except tf.errors.OutOfRangeError:
                print("Finished")
        log_writer.close()
def grad(model, input  ):
  with tf.GradientTape() as tape:
    loss_value = loss(model, input)
  return tape.gradient(loss_value, model.variables)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

checkpoint_dir = 'checkpoint/'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
root = tfe.Checkpoint(optimizer=optimizer,
                      model=model,
                      optimizer_step=tf.train.get_or_create_global_step())
root.restore(tf.train.latest_checkpoint(checkpoint_dir))

number_of_test_images=10
test_data=dataset.train('./datasets')
test_data=test_data.batch(number_of_test_images)
iterator = test_data.make_one_shot_iterator()
next_element,_ = iterator.get_next()
input=tf.reshape(next_element,[next_element.shape[0],28,28,1])#(samples, rows, cols, channels)
print("Final loss: {:.3f}".format(loss(model, input)))
output=model(input)
input_images=input.numpy()
output_images=output.numpy()
for i in range(input_images.shape[0]):
    input_image=np.squeeze(input_images[i,:,:,:])
    output_image=np.squeeze(output_images[i,:,:,:])
    print_image=np.hstack((input_image,output_image))
    file_name='autoencoder_reconstructed_imgs/'+str(i)+'.png'
    scipy.misc.imsave(file_name, print_image)