def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    testImg, testlabels = cifar10.inputs(eval_data=True)
    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)
    test_pre = cifar10.inference(testImg,test=True)
     
    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      if step % 10 == 0:
        print ('loss '+str(loss_value))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)

        #eval
      if step%10==0:
        cifar10.accuracy(test_pre,testlabels)
Exemple #2
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    # with tf.variable_scope("cifar10", reuse=tf.AUTO_REUSE) as scope:
    global_step = tf.train.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    train_flag = tf.placeholder(tf.bool, shape = ())

    trX, trY = cifar10.distorted_inputs()
    teX, teY = cifar10.inputs(eval_data = True)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(trX)
    # Calculate accuracy
    tr_acc = cifar10.accuracy(logits, trY)[1]
    print(tr_acc, "tr_acc\n")
    # tr_acc_sum = tf.summary.scalar('train/accuracy', tr_acc)
    # Calculate loss.
    loss = cifar10.loss(logits, trY)
    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    tf.get_variable_scope().reuse_variables()
    eval_logits = cifar10.inference(teX)
    te_acc = cifar10.accuracy(eval_logits, teY)[1]
    print(te_acc, "te_acc\n")
    # te_acc_sum = tf.summary.scalar('test/accuracy', te_acc)

    accuracy = tf.cond(train_flag, lambda: tr_acc, lambda: te_acc)
    tf.summary.scalar("accuracy", accuracy)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('tmp/cifar10/train')
    test_writer = tf.summary.FileWriter('tmp/cifar10/test')

    print("Training Starts")

    # Configs
    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth=True
                
    mon_sess = tf.train.MonitoredTrainingSession(
            hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                   tf.train.NanTensorHook(loss)],config=config)
    step = -1
    while not mon_sess.should_stop():
      step += 1
      _,loss_value = mon_sess.run([train_op,loss])
      if step % FLAGS.log_frequency == 0:
          tr_acc,summary = mon_sess.run([accuracy,merged], feed_dict = {train_flag : True})
          train_writer.add_summary(summary, step)
          te_acc, summary = mon_sess.run([accuracy, merged], feed_dict = {train_flag : False})
          test_writer.add_summary(summary, step)

          format_str = ('%s: step %d, loss = %.2f, test accuracy = %.2f, train accuracy = %.2f')
          print (format_str % (datetime.now(), step, loss_value, te_acc, tr_acc))
Exemple #3
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images, labels)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Calculate accuracy.
        accuracy = cifar10.accuracy(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(
                    accuracy)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, accuracy = %.4f, (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Exemple #4
0
def trainning():
    (X, Y), (X_test, Y_test) = cifar10.load_data()
    Y = cifar10.to_categorical(Y, 10)
    Y_test = cifar10.to_categorical(Y_test, 10)
    data_set = cifar10.read_data_sets(X, Y, X_test, Y_test)
    # mnist = input_data.read_data_sets("tmp/mnist", one_hot=True)
    # batch_x, batch_y = data_set.train.next_batch(96)

    x_placeholder = tf.placeholder("float", [None, 32 * 32 * 3])
    y_placeholder = tf.placeholder("float", [None, 10])

    logits = cifar10.inference(x_placeholder)
    loss = cifar10.loss(logits, y_placeholder)
    train_op = cifar10.train_op(loss=loss, learning_rate=0.001)
    accuracy = cifar10.accuracy(logits, y_placeholder)
    init = tf.initialize_all_variables()

    with tf.Session() as sess:
        sess.run(init)
        for step in range(MAX_STEPS):
            # print('step = {:d}'.format(step + 1))
            batch_x, batch_y = data_set.train.next_batch(96)
            # print(batch_x.shape)
            # print(batch_y.shape)
            _, Loss, acc = sess.run([train_op, loss, accuracy],
                                    feed_dict={
                                        x_placeholder: batch_x,
                                        y_placeholder: batch_y
                                    })
            if (step + 1) % 100 == 0:
                print("step: {:d} loss: {:f} acc: {:f}".format(
                    step + 1, Loss, acc))
Exemple #5
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  dataset = input_data.read(FLAGS.input_dir)
  image_size = dataset.image_size
  with tf.Graph().as_default():
    # Build a Graph that computes the logits predictions from the
    # inference model.
    eval_images = tf.placeholder(tf.float32, shape=(2, FLAGS.batch_size, image_size[0], image_size[1], image_size[2]))
    labels = tf.placeholder(tf.float32, shape=(FLAGS.batch_size))

    images, images_p = tf.split(0, 2, train_images)
     
    with tf.variable_scope('inference') as scope:
      logits = cifar10.inference(images)
      scope.reuse_variables()
      logits2 = cifar10.inference(images_p)

    # Calculate predictions.
    accuracy = cifar10.accuracy(logits, logits2, labels)
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)
Exemple #6
0
def train():
    with tf.Graph().as_default():
      global_step = tf.train.get_or_create_global_step()
      images, labels = cifar10.distorted_inputs()
      logits = cifar10.inference(images, train = True)
      loss = cifar10.loss(logits, labels)
      accuracy = cifar10.accuracy(logits, labels)
      train_op = cifar10.train(loss, global_step)

      class _LoggerHook(tf.train.SessionRunHook):

        def begin(self):
          self._step = -1

        def before_run(self, run_context):
          self._step += 1
          return tf.train.SessionRunArgs([loss, accuracy])

        def after_run(self, run_context, run_values):
          if self._step % 10 == 0:
            loss_value, acc_value = run_values.results
            format_str = ('step %d, loss = %.2f, accuracy = %.2f ')
            print (format_str %(self._step, loss_value, acc_value))

      with tf.train.MonitoredTrainingSession(
          checkpoint_dir=train_dir,
          hooks=[tf.train.StopAtStepHook(last_step=max_step),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
          config=tf.ConfigProto(
            log_device_placement=False)) as mon_sess:
          while not mon_sess.should_stop():
            mon_sess.run(train_op)
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == "test"
        print(eval_data)
        images, labels, ground_truth = cifar10.inputs(eval_data=eval_data)
        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, _ = cifar10.inference(images)
        print(logits)
        print(logits.get_shape())
        print("after inference node creation")
        loss = cifar10.loss(logits, labels)
        accuracy, precision, accuracies = cifar10.accuracy(logits, ground_truth)
        labels = tf.cast(labels, tf.int64)

        label_shape = labels.get_shape().as_list()
        reshaped_labels = tf.reshape(labels, [label_shape[0] * label_shape[1] * label_shape[2]])
        logits_shape = logits.get_shape().as_list()
        reshaped_logits = tf.reshape(logits, [logits_shape[0] * logits_shape[1] * logits_shape[2], logits_shape[3]])

        # Calculate predictions.
        # top_k_op = tf.nn.in_top_k(logits, labels, 1)
        # top_k_op = tf.nn.in_top_k(reshaped_logits, reshaped_labels, 1)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

        while True:
            print("evaluate:")
            eval_once(saver, summary_writer, summary_op, accuracy, precision, accuracies)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
def train():
  """Train a model for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for a segmentation model.
    images, labels, ground_truth = cifar10.distorted_inputs()
    tf.histogram_summary('label_hist/with_ignore', labels)
    tf.histogram_summary('label_hist/ground_truth', ground_truth)
    
    # Build a Graph that computes the logits predictions from the
    # inference model.
    print("before inference")
    print(images.get_shape())
    logits, nr_params = cifar10.inference(images)
    print("nr_params: "+str(nr_params) )
    print("after inference")
    # Calculate loss.
    loss = cifar10.loss(logits, labels)
    accuracy, precision, cat_accs = cifar10.accuracy(logits, ground_truth)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())
#    tf.image_summary('images2', images)
    print (logits)
#    tf.image_summary('predictions', logits)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      # Restores from checkpoint
      saver.restore(sess, ckpt.model_checkpoint_path)
      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/cifar10_train/model.ckpt-0,
      # extract global_step from it.
      global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
      print('No checkpoint file found')
      print('Initializing new model')
      sess.run(init)
      global_step = 0


    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(global_step, FLAGS.max_steps):
      start_time = time.time()
      _, loss_value, accuracy_value, precision_value, cat_accs_val  = sess.run([train_op,
                                                                                loss,
                                                                                accuracy,
                                                                                precision,
                                                                                cat_accs])
                                                                  
      duration = time.time() - start_time

      print (precision_value)
      print (cat_accs_val)
      
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      #precision_value = [0 if np.isnan(p) else p for p in precision_value]
      #print (precision_value)
      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)\n Accuracy = %.4f, mean average precision = %.4f')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch,
                             accuracy_value, np.mean(precision_value)))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

        summary = tf.Summary()
        summary.value.add(tag='Accuracy (raw)', simple_value=float(accuracy_value))
        for i,s in enumerate(CLASSES):
          summary.value.add(tag="precision/"+s+" (raw)",simple_value=float(precision_value[i]))
          summary.value.add(tag="accs/"+s+" (raw)",simple_value=float(cat_accs_val[i]))
#        summary.value.add(tag='Human precision (raw)', simple_value=float(precision_value))
        summary_writer.add_summary(summary, step)
        print("hundred steps")
      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        print("thousand steps")
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
def train(model_fn, train_folder, qn_id):
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        # Get images and labels for CIFAR-10.
        # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
        # GPU and resulting in a slow down.
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = model_fn(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Calculate accuracy
        model_accuracy = cifar10.accuracy(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        global_step = tf.train.get_or_create_global_step()
        train_op = cifar10.train(loss, model_accuracy, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._start_time = time.time()

            def after_create_session(self, session, coord):
                self._step = session.run(global_step)

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs([loss, model_accuracy
                                                ])  # Asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results[0]
                    acc_value = run_values.results[1]
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s - %s: step %d, loss = %.2f, acc = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str %
                          (qn_id, datetime.now(), self._step, loss_value,
                           acc_value, examples_per_sec, sec_per_batch))

        class _StopAtHook(tf.train.SessionRunHook):
            def __init__(self, last_step):
                self._last_step = last_step

            def after_create_session(self, session, coord):
                self._step = session.run(global_step)

            def before_run(self, run_context):  # pylint: disable=unused-argument
                self._step += 1
                return tf.train.SessionRunArgs(global_step)

            def after_run(self, run_context, run_values):
                if self._step >= self._last_step:
                    run_context.request_stop()

        # class _StopAtHook(tf.train.StopAtStepHook):
        #     def __init__(self, last_step):
        #         super().__init__(last_step=last_step)
        #
        #     def begin(self):
        #         self._global_step_tensor = global_step
        #
        #     def before_run(self, run_context):  # pylint: disable=unused-argument
        #         return tf.train.SessionRunArgs(global_step)
        #
        #     def after_run(self, run_context, run_values):
        #         gs = run_values.results + 1
        #         print("\tgs = {}/{}".format(gs, self._last_step))
        #         if gs >= self._last_step:
        #             # Check latest global step to ensure that the targeted last step is
        #             # reached. global_step read tensor is the value of global step
        #             # before running the operation. We're not sure whether current session.run
        #             # incremented the global_step or not. Here we're checking it.
        #
        #             step = run_context.session.run(self._global_step_tensor)
        #             print("\t\tstep: {}. gs = {}/{}".format(step, gs, self._last_step))
        #             if step >= self._last_step:
        #                 run_context.request_stop()

        saver = tf.train.Saver()
        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=train_folder,
                hooks=[
                    _StopAtHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            latest_checkpoint_path = tf.train.latest_checkpoint(train_folder)
            if latest_checkpoint_path is not None:
                # Restore from checkpoint
                print("Restoring checkpoint from %s" % latest_checkpoint_path)
                saver.restore(mon_sess, latest_checkpoint_path)

            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Exemple #10
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.train.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      train_images, train_labels = cifar10.distorted_inputs()
      val_images, val_labels = cifar10.distorted_inputs()
      test_images, test_labels = cifar10.inputs(eval_data=True)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    train_logits = cifar10.inference(train_images)
    train_acc = cifar10.accuracy(train_labels, train_logits)

    # Calculate loss.
    loss = cifar10.loss(train_logits, train_labels)

    # validation 
    #tf.get_variable_scope().reuse_variables() 
    val_logits = cifar10.inference(val_images)
    val_acc = cifar10.accuracy(val_labels, val_logits)
    test_logits = cifar10.inference(test_images)
    test_acc = cifar10.accuracy(test_labels, test_logits)    

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step, train_acc, val_acc, test_acc)
   
    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1
        self._start_time = time.time()

      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        if self._step % FLAGS.log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
          sec_per_batch = float(duration / FLAGS.log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
Exemple #11
0
def train_rnn(train_loader,
              test_loader,
              lr,
              momentum,
              report=20,
              cell='RNN',
              hidden=64,
              logdir='results/rnn'):
    writer = SummaryWriter(logdir)
    ntrain = 1000  # per class
    ntest = 100  # per class
    nclass = 10  # number of classes
    imsize = 28
    batchsize = 100
    nsamples = ntrain * nclass

    net = RNN(cell, hidden)
    net.cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    running_loss = 0.0
    # batch_xs = torch.tensor(np.zeros((batchsize, imsize, imsize)), device='cuda', dtype=torch.float32)#setup as [batchsize, width, height, numberOfChannels] and use np.zeros()
    # batch_ys = torch.tensor(np.zeros(batchsize), device='cuda', dtype=torch.long)#setup as [batchsize, the how many classes]
    test_xs, test_ys = next(iter(test_loader))
    test_xs = test_xs.cuda()
    test_ys = test_ys.cuda()

    for epoch in range(1):
        for i, (batch_xs, batch_ys) in enumerate(train_loader):
            batch_xs = batch_xs.view(-1, 28, 28).cuda()
            batch_ys = batch_ys.cuda()
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(batch_xs)
            loss = criterion(outputs, batch_ys)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if (
                    i + 1
            ) % report == 0:  # print every {report * batch_size} iterations
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / report))
                writer.add_scalar('training_loss', running_loss / report,
                                  epoch * nsamples + (i + 1) * batchsize)
                running_loss = 0.0

                train_accuracy = accuracy(net,
                                          batch_xs.view(-1, 28, 28).float(),
                                          batch_ys)
                print(f'train_accuracy:{train_accuracy:.3f}')
                writer.add_scalar('training_accuracy', train_accuracy,
                                  epoch * nsamples + (i + 1) * batchsize)

                test_accuracy = accuracy(net,
                                         test_xs.view(-1, 28, 28).float(),
                                         test_ys)
                print(f'test_accuracy:{test_accuracy:.3f}')
                writer.add_scalar('test_accuracy', test_accuracy,
                                  epoch * nsamples + (i + 1) * batchsize)

    return net
Exemple #12
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()
        # avg = tf.reduce_mean(labels)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images, dropout=0.8, reuse=False)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # calculate accuracy for training set
        acc = cifar10.accuracy(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # Setup cross validation right here
        # get a batch of xvalidation images
        # eval_data=False: use cross validation, not test test set
        val_images, val_labels = cifar10.inputs(eval_data=False)

        val_logits = cifar10.inference(val_images, dropout=1, reuse=True)

        top_k_op = tf.nn.in_top_k(val_logits, val_labels, 1)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value, acc_value = sess.run([train_op, loss, acc])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = ('%s: step %d, loss = %.2f, acc = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value, acc_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                # run the xvalidation
                prediction = sess.run([top_k_op])
                precision = np.sum(prediction) / FLAGS.batch_size
                print(("At step %d cross validation precision: %.3f") % (step, precision))
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.train.get_or_create_global_step()
    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    accuracy = cifar10.accuracy(logits, labels)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1
        self._start_time = time.time()

      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        if self._step % FLAGS.log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
          sec_per_batch = float(duration / FLAGS.log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    mon_sess = tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()], save_checkpoint_steps=200,
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))    # as mon_sess:

    cifar10.maybe_download_and_extract()
    if tf.gfile.Exists(FLAGS.eval_dir):
        tf.gfile.DeleteRecursively(FLAGS.eval_dir)
    tf.gfile.MakeDirs(FLAGS.eval_dir)

    """Validate CIFAR-10 for a number of steps"""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == 'test'
        images, labels = cifar10.inputs(eval_data=eval_data, valData=True)
        print(images.shape)
        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
                cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
        #sess = tf.Session(graph=g)

        #Training Loop: Evaluate Validation accuracy after 390 steps almost an epoch
        for i in range(FLAGS.max_steps):
            if i % 390 == 0:
                eval_once(saver, summary_writer, top_k_op, summary_op)
            mon_sess.run(train_op)