Esempio n. 1
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  dataset = input_data.read(FLAGS.input_dir)
  image_size = dataset.image_size
  with tf.Graph().as_default():
    # Build a Graph that computes the logits predictions from the
    # inference model.
    eval_images = tf.placeholder(tf.float32, shape=(2, FLAGS.batch_size, image_size[0], image_size[1], image_size[2]))
    labels = tf.placeholder(tf.float32, shape=(FLAGS.batch_size))

    images, images_p = tf.split(0, 2, train_images)
     
    with tf.variable_scope('inference') as scope:
      logits = cifar10.inference(images)
      scope.reuse_variables()
      logits2 = cifar10.inference(images_p)

    # Calculate predictions.
    accuracy = cifar10.accuracy(logits, logits2, labels)
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    testImg, testlabels = cifar10.inputs(eval_data=True)
    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)
    test_pre = cifar10.inference(testImg,test=True)
     
    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      if step % 10 == 0:
        print ('loss '+str(loss_value))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)

        #eval
      if step%10==0:
        cifar10.accuracy(test_pre,testlabels)
def eval_once(filename):
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get loss.
    # Create placeholder.
    
    images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3))
    labels = tf.placeholder(tf.int32, shape = (FLAGS.batch_size,))
    domain_labels = tf.placeholder(tf.int32, shape = (FLAGS.batch_size,))

    #loss_label, loss_domain = cifar10.inference(images) 

    
    logits1, logits2= cifar10.inference(images)
    loss_label = cifar10.loss_without_decay(logits1, labels)
    loss_domain = cifar10.loss_without_decay(logits2, domain_labels)
    

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()

    saver = tf.train.Saver(variables_to_restore)
  
    x_tr, y_tr, d_tr, nl_x_v, nl_y_v, nl_d_v, l_x_te, l_y_te, l_d_te = input.input()

    # Calculate losses of training, non-lifelog validation, and lifelog test.
    y_tr_loss, d_tr_loss = eval_a_dataset(saver, filename, x_tr, y_tr, d_tr, images, labels, domain_labels, loss_label, loss_domain)
    y_nl_loss, d_nl_loss = eval_a_dataset(saver, filename, nl_x_v, nl_y_v, nl_d_v, images, labels, domain_labels, loss_label, loss_domain)
    y_l_loss, d_l_loss = eval_a_dataset(saver, filename, l_x_te, l_y_te, l_d_te, images, labels, domain_labels, loss_label, loss_domain)


  return y_tr_loss, d_tr_loss, y_nl_loss, d_nl_loss, y_l_loss, d_l_loss
def test_once(filename):
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get loss.
    # Create placeholder.
    
    images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3))

    loss_label, loss_domain = cifar10.inference(images)
    

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()

    saver = tf.train.Saver(variables_to_restore)
   
    _, _, _, _, _, _, \
            x, y, domain= input.input()

    # Supplement the number of examples to multiplier of 25.
    num_of_examples = np.shape(x)[0]

    remainder = FLAGS.batch_size - int(math.ceil(num_of_examples/FLAGS.batch_size))
    index = range(num_of_examples) + [0] * remainder

    with tf.Session() as sess:
       #need to modify for test
       saver.restore(sess, filename) 
       global_step = int(filename.split('-')[1])


       # Allocate results in a list.
       losses_label = []
       losses_domain = []

       # Start the queue runners.
       step = 0
       while step + FLAGS.batch_size <= len(index):
           
           label_loss_value, domain_loss_value = sess.run([loss_label, loss_domain], feed_dict = {images:x[index[step:step+FLAGS.batch_size], :]})

           losses_label.append(label_loss_value)
           losses_domain.append(domain_loss_value)
           step = step + FLAGS.batch_size


       # Convert list of lists to numpy array.
       losses_label = np.asarray(losses_label)
       losses_domain = np.asarray(losses_domain)

       losses_label = losses_label.reshape((-1, 21))
       losses_domain = losses_domain.reshape((-1, 2))

       losses_label = losses_label[:num_of_examples, :]
       losses_domain = losses_domain[:num_of_examples, :]

  sp.savez('test.npz', losses_label = losses_label, losses_domain = losses_domain, y = y, domain = domain)

  return losses_label, losses_domain, y, domain
def tower_loss(scope, images, labels):
    # Build inference Graph.
    logits = cifar10.inference(images,
                               None,
                               None,
                               None,
                               None,
                               None,
                               None,
                               None,
                               train=True)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = cifar10.loss(logits, labels)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)

    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        # session. This helps the clarity of presentation on tensorboard.
        loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)

    return total_loss
Esempio n. 6
0
def tower_loss(scope):
  """Calculate the total loss on a single tower running the CIFAR model.
  Args:
    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """
  # Get images and labels for CIFAR-10.
  images, labels = cifar10.distorted_inputs()
  # Build inference Graph.
  logits = cifar10.inference(images)
  # Build the portion of the Graph calculating the losses. Note that we will
  # assemble the total_loss using a custom function below.
  _ = cifar10.loss(logits, labels)
  # Assemble all of the losses for the current tower only.
  losses = tf.get_collection('losses', scope)
  # Calculate the total loss for the current tower.
  total_loss = tf.add_n(losses, name='total_loss')
  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  loss_averages_op = loss_averages.apply(losses + [total_loss])
  # Attach a scalar summmary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
    # session. This helps the clarity of presentation on tensorboard.
    loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    tf.scalar_summary(loss_name +' (raw)', l)
    tf.scalar_summary(loss_name, loss_averages.average(l))
  with tf.control_dependencies([loss_averages_op]):
    total_loss = tf.identity(total_loss)
  return total_loss
Esempio n. 7
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == 'test'
        images, _ = cifar10.inputs(eval_data=eval_data)
        logits = cifar10.inference(images)
        prediction = tf.nn.softmax(logits)

        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoint
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print('No checkpoint file found')
                return

            # Build a Graph that computes the logits predictions from the
            # inference model.
            result = tf.argmax(prediction, 1)
            tf.print(result)
            print('DONE')
Esempio n. 8
0
def tower_loss(scope):
    images, labels = cifar10.distorted_inputs()
    logits = cifar10.inference(images)
    _ = cifar10.loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    return total_loss
def main(_):
    with tf.Graph().as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()

        images, labels = cifar10.distorted_inputs()

        logits = cifar10.inference(images)

        loss = cifar10.loss(logits, labels)

        train_op = cifar10.train(loss, global_step)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            summary = tf.summary.merge_all()
            writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

            print('Training started')
            for i in range(FLAGS.max_step):
                start_time = time.time()
                _, loss_value = sess.run([train_op, loss])
                duration = time.time() - start_time

                if i % 100 == 0:
                    print('Step %6d: loss = %.2f (%.3f sec)' %
                          (i, loss_value, duration))
                    summary_str = sess.run(summary)
                    writer.add_summary(summary_str, i)
                    writer.flush()

            coord.request_stop()
            coord.join(threads)
Esempio n. 10
0
def tower_loss(scope, images, labels):
  """Calculate the total loss on a single tower running the CIFAR model.

  Args:
    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
    images: Images. 4D tensor of shape [batch_size, height, width, 3].
    labels: Labels. 1D tensor of shape [batch_size].

  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """

  # Build inference Graph.
  logits = cifar10.inference(images)

  # Build the portion of the Graph calculating the losses. Note that we will
  # assemble the total_loss using a custom function below.
  _ = cifar10.loss(logits, labels)

  # Assemble all of the losses for the current tower only.
  losses = tf.get_collection('losses', scope)

  # Calculate the total loss for the current tower.
  total_loss = tf.add_n(losses, name='total_loss')

  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
    # session. This helps the clarity of presentation on tensorboard.
    loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
    tf.summary.scalar(loss_name, l)

  return total_loss
Esempio n. 11
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Esempio n. 12
0
def train():
    with tf.Graph().as_default():
        # get global step
        global_step = tf.train.get_or_create_global_step()
        # get data through cpu
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()
        # get loss and logit
        # logits = cifar10.inference(images=images, r=low_ranks)
        logits = cifar10.inference(images=images, r=low_ranks)
        loss = cifar10.loss(logits=logits, labels=labels)
        # set train_op
        train_op = cifar10.train(loss, global_step)
        for v in tf.trainable_variables():
            print(v)
        nonzero = tf.count_nonzero(tf.get_collection('sparse_components')[-1])

        # define a LoggerHook to log something
        # clean_list = tf.get_collection('sparse_components')
        # clean_list = clean_s(clean_list)
        # clean_op = [c.op for c in clean_list]

        class _LoggerHook(tf.train.SessionRunHook):
            """
            log session and runtime info
            """
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time
                    loss_value = run_values.results
                    example_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.6f (%.1f examples/sec;'
                        '%.3f sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        example_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Esempio n. 13
0
def evaluate():
	""" 多次评价CIFAR-10 """
	with tf.Graph().as_default() as g: 
		# 从CIFAR-10中读取图像和标签
		eval_data = (FLAGS.eval_data == 'test')
		images, labels = cifar10.input(eval_data = eval_data)

		# 构建一个图计算推理模型的logits
		logits = cifar10.inference(images)

		# 计算准确度
		top_k_op = tf.nn.in_top_k(logits, labels, 1)

		# 读取已经训练好的模型参数
		variable_averages = tf.train.ExponentialMovingAverage(
			cifar10.MOVING_AVERAGE_DECAY)
		variables_to_restore = variable_averages.variables_to_restore()
		saver = tf.train.Saver(variables_to_restore)

		# 用TF collection of Summaries构建summary operation
		summary_op = tf.summary.merge_all()

		summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

		run_time = FLAGS.run_once
		while True: 
			eval_once(saver, summary_writer, top_k_op, summary_op)
			if run_time <= 0:
				break
			run_time -= 1
			time.sleep(FLAGS.eval_interval_secs)
Esempio n. 14
0
def train():
    with tf.Graph().as_default():
        images, labels = cifar10.distorted_inputs()

        logits = cifar10.inference(images)

        loss = cifar10.loss(logits, labels)

        global_step = tf.Variable(0, trainable=False)
        train_op = cifar10.train(loss, global_step=global_step)

        summary_op = tf.merge_all_summaries()

        init = tf.global_variables_initializer()

        sess = tf.Session()
        sess.run(init)

        tf.train.start_queue_runners(sess=sess)
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        trainable_var = tf.trainable_variables()
        print([v.name for v in trainable_var])
        var = trainable_var[0]
        res = sess.run(var)
        print(type(res))
Esempio n. 15
0
def train():
    with tf.Graph().as_default():
      global_step = tf.train.get_or_create_global_step()
      images, labels = cifar10.distorted_inputs()
      logits = cifar10.inference(images, train = True)
      loss = cifar10.loss(logits, labels)
      accuracy = cifar10.accuracy(logits, labels)
      train_op = cifar10.train(loss, global_step)

      class _LoggerHook(tf.train.SessionRunHook):

        def begin(self):
          self._step = -1

        def before_run(self, run_context):
          self._step += 1
          return tf.train.SessionRunArgs([loss, accuracy])

        def after_run(self, run_context, run_values):
          if self._step % 10 == 0:
            loss_value, acc_value = run_values.results
            format_str = ('step %d, loss = %.2f, accuracy = %.2f ')
            print (format_str %(self._step, loss_value, acc_value))

      with tf.train.MonitoredTrainingSession(
          checkpoint_dir=train_dir,
          hooks=[tf.train.StopAtStepHook(last_step=max_step),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
          config=tf.ConfigProto(
            log_device_placement=False)) as mon_sess:
          while not mon_sess.should_stop():
            mon_sess.run(train_op)
Esempio n. 16
0
def tower_loss(scope):
    """Calculate the total loss on a single tower running the CIFAR model.
    Args:
      scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
    Returns:
       Tensor of shape [] containing the total loss for a batch of data
    """
    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()
    # Build inference Graph.
    logits = cifar10.inference(images)
    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = cifar10.loss(logits, labels)
    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)
    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')
    # Compute the moving average of all individual losses and the total loss.
    # loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    # loss_averages_op = loss_averages.apply(losses + [total_loss])
    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    # for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
    # session. This helps the clarity of presentation on tensorboard.
    # loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    # tf.scalar_summary(loss_name +' (raw)', l)
    # tf.scalar_summary(loss_name, loss_averages.average(l))
    # with tf.control_dependencies([loss_averages_op]):
    # total_loss = tf.identity(total_loss)
    return total_loss
Esempio n. 17
0
def tower_loss(scope):
    """Calculate the total loss on a single tower running the CIFAR model.

    Args:
      scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'

    Returns:
       Tensor of shape [] containing the total loss for a batch of data
    """
    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    # Build inference Graph.
    logits = cifar10.inference(images)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = cifar10.loss(logits, labels)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)

    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        # session. This helps the clarity of presentation on tensorboard.
        loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)

    return total_loss
Esempio n. 18
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():

    global_step = tf.Variable(0, trainable=False)
    images, labels = cifar10.distorted_inputs()
    logits = cifar10.inference(images)
    loss = cifar10.loss(logits, labels)
    # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels))
    # train_op = tf.train.GradientDescentOptimizer(1e-2).minimize(loss)
    train_op = cifar10.train(loss, global_step)
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    saver = tf.train.Saver(tf.all_variables())
    init = tf.initialize_all_variables()
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)

    true_count = 0
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value, precisions = sess.run([train_op, loss, top_k_op])

      true_count += np.sum(precisions)

      if step % 10 == 0:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
        duration = time.time() - start_time
        print(' step %d, loss = %.3f, acc = %.3f, dur = %.2f' % 
             (step, loss_value, true_count/(FLAGS.batch_size*10), duration))
        true_count = 0
Esempio n. 19
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == 'test'
        images, labels = cifar10.inputs(eval_data=eval_data)
        tf.summary.image('images', images)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
Esempio n. 20
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    # eval_data = FLAGS.eval_data == 'test'
    # images, labels = cifar10.inputs(eval_data=eval_data)
    f = tf.read_file('/images/input.jpg')
    image = tf.image.decode_jpeg(f, channels=3)
    image = tf.image.resize_images(image, (24, 24))

    tf.summary.image('images', [image])

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference([image])

    # Calculate predictions.
    # top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

    eval_once(saver, summary_writer, logits, summary_op)
Esempio n. 21
0
def train():
    """Train the cifar-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        # Force the input pipeline to CPU:0 to avoid operation sometimes ended up on
        # GPU and resulting in a slow down.
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()

            # Build a Graph that computes the logits predictions from the
            # inference model

        logits = cifar10.inference(images)
        # calculate the loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameter
        train_op = cifar10.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(
                    loss)  # asks for the loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f',
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
def evaluate():
    """
    Evaluate cifar10 for a number of steps
    """
    with tf.Graph().as_default() as g:
        # get images and labels for cifar-10
        eval_data = FLAGS.eval_data == "test"
        images, labels = cifar10.inputs(eval_data=eval_data)

        # build a tf.Graph that computes logits from inference model
        logits = cifar10.inference(images)

        # calculate prediction
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        # restore moving average version of the learned variables for eval
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # build the summary operation based on the tf collection of summaries
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, g)

        eval_once(saver, summary_writer, top_k_op, summary_op)
Esempio n. 24
0
def evaluate_image(filename):
    with tf.Graph().as_default() as g:
        # Number of images to process
        FLAGS.batch_size = 1

        image = img_read(filename)
        logit = cifar10.inference(image)

        output = tf.nn.softmax(logit)
        top_k_pred = tf.nn.top_k(output, k=1)

        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        with tf.Session() as sess:
            #sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoint
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("No checkpoint file found")
                return

            values, indices = sess.run(top_k_pred)
            print LABEL_NAMES[indices[0][0] - 1], values[0][0]
Esempio n. 25
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    
    inputs = cifar10.ram_inputs(unit_variance=True, is_train=False)
    images = inputs['images']
    labels = inputs['labels']

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images, 3, use_batchnorm=True, 
        use_nrelu=False, id_decay=False, add_shortcuts=True, is_train=False)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op, inputs)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Esempio n. 26
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    images, labels = cifar10.inputs(eval_data='test')
    # need modify for test
    logits, _ = cifar10.inference(images)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    del variables_to_restore['input_producer/limit_epochs/epochs']
    saver = tf.train.Saver(variables_to_restore)
    
    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    summary_writer = tf.train.SummaryWriter(FLAGS.test_dir, g)

    while True:
      eval_once_given_model(saver, summary_writer, logits, labels, summary_op, images)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Esempio n. 27
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)  # Generate a batch of images and labels

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1) # k = 1, this outputs a batch_size bool array, an entry out[i] is true if the prediction for the target class is among the top k predictions among all predictions for example i

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore() # Returns a map of names to Variables to restore.
    saver = tf.train.Saver(variables_to_restore)                    # The Saver class adds ops to save and restore variables to and from checkpoints

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()   # Merges all summaries collected in the default graph.

    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)   # create an event file in a given directory and add summaries and events to it

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs) # Suspend execution of the calling thread for the given number of seconds
Esempio n. 28
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), "Model diverged with loss = NaN"

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = "%s: step %d, loss = %.2f (%.1f examples/sec; %.3f " "sec/batch)"
                print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                saver.save(sess, checkpoint_path, global_step=step)
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1

            def before_run(self, run_context):
                self._step += 1
                self._start_time = time.time()
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                duration = time.time() - self._start_time
                loss_value = run_values.results
                if self._step % 10 == 0:
                    num_examples_per_step = FLAGS.batch_size
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))
                    global step_no
                    step_no = self._step

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(
                    log_device_placement=FLAGS.log_device_placement,
                    inter_op_parallelism_threads=4,
                    intra_op_parallelism_threads=0)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
                """run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
def train():
    """ 多步训练CIFAR-10数据集 """
    with tf.Graph().as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()

        # 指定 CPU:0 进行数据的变形处理
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()

        # inference 前向预测,定义在cifar10.py文件
        logits = cifar10.inference(images)

        # loss.
        loss = cifar10.loss(logits, labels)

        # train
        train_op = cifar10.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """记录 loss 和 runtime."""
            def __init__(self):
                self._start_time = time.time()
                self._step = -1

            def begin(self):
                pass

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(
                    loss)  # 将loss 操作加到Session.run()调用

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)'
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(
                        last_step=FLAGS.max_steps),  # 在运行指定步长后停止
                    tf.train.NanTensorHook(loss),  # 监视loss,如果loss==NaN,停止训练
                    _LoggerHook()
                ],
                # log_device_placement是否打印设备分配日志
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
def train():
    """Train CIFAR-10 for a number of steps."""

    g1 = tf.Graph()
    with g1.as_default():
        global_step = tf.contrib.framework.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)
        grads = cifar10.train_part1(loss, global_step)

        only_gradients = [g for g, _ in grads]
        only_vars = [v for _, v in grads]
        placeholder_gradients = []

        #with tf.device("/gpu:0"):
        for grad_var in grads:
            placeholder_gradients.append(
                (tf.placeholder('float',
                                shape=grad_var[0].get_shape()), grad_var[1]))

        feed_dict = {}

        for i, grad_var in enumerate(grads):
            feed_dict[placeholder_gradients[i][0]] = np.zeros(
                placeholder_gradients[i][0].shape)

        train_op = cifar10.train_part2(global_step, placeholder_gradients)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        feeds = []
        print("Reached here")
        for i, grad_var in enumerate(grads):
            feeds.append(placeholder_gradients[i][0])
        # Partial Run
        print("Reached here", len(feeds))
        for x in feeds:
            print(x, )
        h = sess.partial_run_setup([only_gradients, train_op], feeds)
        print("Reached here")

        for i in xrange(10):
            res_grads = sess.partial_run(h,
                                         only_gradients,
                                         feed_dict=feed_dict)

            feed_dict = {}
            for i, grad_var in enumerate(res_grads):
                feed_dict[placeholder_gradients[i][0]] = res_grads[i]

            res_train_op = sess.partial_run(h, train_op, feed_dict=feed_dict)
Esempio n. 32
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    dt = Transformer.Transformer()
    dt.checkpoint_version = FLAGS.checkpoint_version
    dt.trainable = False

    #with dt.graph.as_default() as g:
    with tf.Graph().as_default() as g:
        if FLAGS.is_compressed:
            dt.load_flow('/tmp/cifar10_train_' +
                         str(dt.checkpoint_version - 1) +
                         '/pruned_flow_graph.pkl')  # prefetch flow graph
            dt.flow = dt.cached_flow
        else:
            dt.flow = NeuralFlow()

        with g.device('/cpu:0'):
            # Get images and labels for CIFAR-10.
            eval_data = FLAGS.eval_data == 'test'
            images, labels = cifar10.inputs(eval_data=eval_data)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(dt,
                                   images,
                                   is_training=False,
                                   is_compressed=FLAGS.is_compressed)

        total_params, _ = slim.model_analyzer.analyze_vars(
            slim.get_model_variables())
        print('total params: %d' % total_params)

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        # Calculate loss.
        loss_op = cifar10.loss(dt, logits, labels)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        variables_to_restore_simple = tf.global_variables()

        if FLAGS.is_compressed:
            saver = tf.train.Saver(variables_to_restore_simple)
        else:
            saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(dt, saver, summary_writer, top_k_op, loss_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
Esempio n. 33
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == 'test'
        images, labels = cifar10.inputs(eval_data=eval_data)

        # Build a Graph that computes the logits predictions from the
        # # inference model. 推理模型
        logits = cifar10.inference(images)

        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        ###
        # tf.nn.in_top_k组要是用于计算预测的结果和实际结果的是否相等,返回一个bool类型的张量,
        # tf.nn.in_top_k(prediction, target, K):
        # prediction就是表示你预测的结果,大小就是预测样本的数量乘以输出的维度,类型是tf.float32
        # 等。target就是实际样本类别的标签,大小就是样本数量的个数。K表示每个样本的预测结果的前K
        # 个最大的数里面是否含有target中的值。一般都是取1。
        #
        # import tensorflow as tf;
        # A = [[0.8,0.6,0.3], [0.1,0.6,0.4]]
        # B = [1, 1]
        # out = tf.nn.in_top_k(A, B, 1)
        # with tf.Session() as sess:
        #     sess.run(tf.initialize_all_variables())
        #     print sess.run(out)
        #
        # Running Result:[False, True] ! 注意0起址index
        ###

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        ###
        # tf.train.ExponentialMovingAverage这个函数用于更新参数,就是采用滑动平均的方法更新参数。
        # 这个函数初始化需要提供一个衰减速率(decay),用于控制模型的更新速度。这个函数还会维护一个
        # 影子变量(也就是更新参数后的参数值),这个影子变量的初始值就是这个变量的初始值,影子变量
        # 值的更新方式如下:
        # shadow_variable = decay * shadow_variable + (1-decay) * variable
        # shadow_variable是影子变量,variable表示待更新的变量,也就是变量被赋予的值,decay为衰减速率。
        # decay一般设为接近于1的数(0.99,0.999)。decay越大模型越稳定,因为decay越大,参数更新的速度
        # 就越慢,趋于稳定。
        ###
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            # # 休眠一顿时间后再评估
            time.sleep(FLAGS.eval_interval_secs)
Esempio n. 34
0
def build_model_func():

    images, labels = cifar10.distorted_inputs()

    logits = cifar10.inference(images)

    loss = cifar10.loss(logits, labels)

    return loss
Esempio n. 35
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = inputs() # cifar10.inputs(eval_data=eval_data)
    print "images dimension @ evaluation:", images
    print "image label @ evaluation:", labels



    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)



    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    top_k_predict_op = tf.argmax(logits, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = {}
    for v in tf.all_variables():
      if v in tf.trainable_variables():
        restore_name = variable_averages.average_name(v)
      else:
        restore_name = v.op.name
      variables_to_restore[restore_name] = v
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)

    # test_image(saver, logits)

    ###Test one image
    print "#################"
    print "##gonna test this:"
    print "#################"




    ###

    while True:
      eval_once(saver, summary_writer, top_k_op, top_k_predict_op, summary_op, images)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Esempio n. 36
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.train.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1
        self._start_time = time.time()

      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        if self._step % FLAGS.log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
          sec_per_batch = float(duration / FLAGS.log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
Esempio n. 37
0
def train():
    """ Train the CIFAR10 model for a number of steps"""
    global_step = tf.train.get_or_create_global_step()

    # Get images and labels for the cifar10
    data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
    images_batch, labels_batch = cifar10_input.train_inputs(
        data_dir, FLAGS.batch_size)

    # Build a Graph that computes the logits predictions from the
    # inference model
    logits = cifar10.inference(images_batch)

    # Compute loss
    loss = cifar10.loss(logits, labels_batch)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
        """Logs loss and runtime"""
        def begin(self):
            self._step = -1
            self._start_time = time.time()

        def before_run(self, run_context):
            self._step += 1
            return tf.train.SessionRunArgs(loss)  # Asks for loss value

        def after_run(self, run_context, run_values):
            if self._step % FLAGS.log_frequency == 0:
                current_time = time.time()
                duration = current_time - self._start_time
                self._start_time = current_time

                loss_value = run_values.results
                examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                sec_per_batch = float(duration / FLAGS.log_frequency)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f'
                    'sec/batch)')
                print(format_str % (datetime.now(), self._step, loss_value,
                                    examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=FLAGS.checkpoint_dir,
            hooks=[
                tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                tf.train.NanTensorHook(loss),
                _LoggerHook()
            ],
            config=tf.ConfigProto(
                log_device_placement=FLAGS.log_device_placement)) as mon_sess:
        while not mon_sess.should_stop():
            mon_sess.run(train_op)
def main():
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)
    with tf.Session() as sess:
        # Build a Graph that computes the logits predictions from the
        # inference model.
        probabilities = tf.nn.softmax(cifar10.inference(images))

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
            return

        # Start the queue runners.
        coord = tf.train.Coordinator()
        try:
            threads = []
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(
                    qr.create_threads(sess,
                                      coord=coord,
                                      daemon=True,
                                      start=True))

            num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))

            submission = []
            true_labels = []

            step = 0
            while step < num_iter and not coord.should_stop():
                submission_batch, true_labels_batch = sess.run(
                    [probabilities, labels])
                submission.append(submission_batch)
                true_labels.append(true_labels_batch)
                step += 1

            submission = np.vstack(submission)
            true_labels = np.concatenate(true_labels)

        except Exception as e:  # pylint: disable=broad-except
            coord.request_stop(e)

        coord.request_stop()
        coord.join(threads, stop_grace_period_secs=10)

    return submission, true_labels
Esempio n. 39
0
def run(name):
    
    image_name = 'living.jpg'
    image_string = open(image_name, 'rb').read()
    image = tf.image.decode_jpeg(image_string, channels=1)

    if image.dtype != tf.float32:
       image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    
    image = tf.expand_dims(image, 0)
    
    # Save the processed image for Fathom
    with tf.Session() as sess:
        npimage = image.eval()
    np.save('image.npy', npimage.squeeze())

    # Run the network with Tensorflow
    with tf.Graph().as_default():
        image = tf.placeholder("float", [1,45,44,1], name="input")

        #with slim.arg_scope(inception.inception_v1_arg_scope()):
        logits = cifar10.inference(image)
        #probabilities = tf.nn.softmax(logits)
        probabilities = logits
        #init_fn = slim.assign_from_checkpoint_fn('./model.ckpt-19999', slim.get_model_variables('model'))
        
        variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        with tf.Session() as sess:
          ckpt = tf.train.get_checkpoint_state('ecotaxa_train/')
          saver.restore(sess, ckpt.model_checkpoint_path)
        init_fn = slim.assign_from_checkpoint_fn('./ecotaxa_train/model.ckpt-0', tf.global_variables())

        with tf.Session() as sess:
            init_fn(sess)
            prob = probabilities.eval(feed_dict = {"input:0" : npimage})
            # Save the network for Fathom
            saver = tf.train.Saver(tf.global_variables())
            saver.save(sess, name)
        """
        i = 1
        for n in tf.get_default_graph().as_graph_def().node:
          if i < 100:
             print("name:",n.name)
             print("op",n.op)
             print("input:",n.input)
             print("    ")
          i = i +1
        """
    # Dump output
    prob = prob.squeeze()
    probidx = np.argsort(-prob)
    print('Top-2:')
    for i in range(2):
        print(probidx[i], prob[probidx[i]])
def evaluate(images, labels=None):
    with tf.Graph().as_default():

        image_holder = tf.placeholder(tf.uint8, [32, 32, 3])
        resized_image = tf.cast(image_holder, tf.float32)
        resized_image = tf.image.resize_image_with_crop_or_pad(
            image_holder, IMAGE_SIZE, IMAGE_SIZE)

        # Subtract off the mean and divide by the variance of the pixels.
        float_image = tf.image.per_image_standardization(resized_image)

        # batch size = 1
        float_image_batch = tf.reshape(float_image,
                                       [1, IMAGE_SIZE, IMAGE_SIZE, 3])

        # inference model.
        logits = cifar10.inference(float_image_batch, False)
        softmax = tf.nn.softmax(logits)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
            else:
                print('No checkpoint file found')
                return

            total = 0
            correct = 0
            statics = {}
            for cls in CLASSES:
                statics[cls] = 0

            for i, image in enumerate(images):
                total += 1
                softmax_out = sess.run(softmax,
                                       feed_dict={image_holder: image})
                index = np.argmax(softmax_out)
                if labels:
                    if labels[i] == index:
                        correct += 1
                statics[CLASSES[index]] += 1

            for cls in CLASSES:
                print('%-12s: %4d/%d' % (cls, statics[cls], total))
            if labels:
                print('%d/%d, %.1f%%' %
                      (correct, total, correct * 100 / float(total)))
Esempio n. 41
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1

      def before_run(self, run_context):
        self._step += 1
        self._start_time = time.time()
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        duration = time.time() - self._start_time
        loss_value = run_values.results
        if self._step % 10 == 0:
          num_examples_per_step = FLAGS.batch_size
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))
          global step_no 
          step_no = self._step

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement, inter_op_parallelism_threads=4,intra_op_parallelism_threads=0)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
        """run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
Esempio n. 42
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)
    logits = cifar10.inference(images)
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    loss = cifar10.loss(logits, labels)
    saver = tf.train.Saver(tf.all_variables())
    
    while True:
      eval_once(saver, top_k_op, loss)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Esempio n. 43
0
def train():
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        images, labels = cifar10.distorted_inputs()

        logits = cifar10.inference(images)

        loss = cifar10.loss(logits, labels)

        train_op = cifar10.train(loss, global_step)

        saver = tf.train.Saver(tf.all_variables())

        summary_op = tf.merge_all_summaries()

        init = tf.initialize_all_variables()

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), "Model diverged with loss = NaN"

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = "%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)"
                print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt")
                saver.save(sess, checkpoint_path, global_step=step)
Esempio n. 44
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == "test"
        print(eval_data)
        images, labels, ground_truth = cifar10.inputs(eval_data=eval_data)
        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, _ = cifar10.inference(images)
        print(logits)
        print(logits.get_shape())
        print("after inference node creation")
        loss = cifar10.loss(logits, labels)
        accuracy, precision, accuracies = cifar10.accuracy(logits, ground_truth)
        labels = tf.cast(labels, tf.int64)

        label_shape = labels.get_shape().as_list()
        reshaped_labels = tf.reshape(labels, [label_shape[0] * label_shape[1] * label_shape[2]])
        logits_shape = logits.get_shape().as_list()
        reshaped_logits = tf.reshape(logits, [logits_shape[0] * logits_shape[1] * logits_shape[2], logits_shape[3]])

        # Calculate predictions.
        # top_k_op = tf.nn.in_top_k(logits, labels, 1)
        # top_k_op = tf.nn.in_top_k(reshaped_logits, reshaped_labels, 1)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

        while True:
            print("evaluate:")
            eval_once(saver, summary_writer, summary_op, accuracy, precision, accuracies)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
Esempio n. 45
0
def evaluate():
    with tf.Graph().as_default():
        eval_data = FLAGS.eval_data == 'test'
        images, labels = cifar10.inputs(eval_data=eval_data)
        
        logits = cifar10.inference(images)
        
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        
        variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        
        summary_op = tf.merge_all_summaries()
        
        graph_def = tf.get_default_graph().as_graph_def()
        summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def)
        
        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
Esempio n. 46
0
def train():
  """Train a model for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for a segmentation model.
    images, labels, ground_truth = cifar10.distorted_inputs()
    tf.histogram_summary('label_hist/with_ignore', labels)
    tf.histogram_summary('label_hist/ground_truth', ground_truth)
    
    # Build a Graph that computes the logits predictions from the
    # inference model.
    print("before inference")
    print(images.get_shape())
    logits, nr_params = cifar10.inference(images)
    print("nr_params: "+str(nr_params) )
    print("after inference")
    # Calculate loss.
    loss = cifar10.loss(logits, labels)
    accuracy, precision, cat_accs = cifar10.accuracy(logits, ground_truth)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())
#    tf.image_summary('images2', images)
    print (logits)
#    tf.image_summary('predictions', logits)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      # Restores from checkpoint
      saver.restore(sess, ckpt.model_checkpoint_path)
      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/cifar10_train/model.ckpt-0,
      # extract global_step from it.
      global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
      print('No checkpoint file found')
      print('Initializing new model')
      sess.run(init)
      global_step = 0


    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(global_step, FLAGS.max_steps):
      start_time = time.time()
      _, loss_value, accuracy_value, precision_value, cat_accs_val  = sess.run([train_op,
                                                                                loss,
                                                                                accuracy,
                                                                                precision,
                                                                                cat_accs])
                                                                  
      duration = time.time() - start_time

      print (precision_value)
      print (cat_accs_val)
      
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      #precision_value = [0 if np.isnan(p) else p for p in precision_value]
      #print (precision_value)
      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)\n Accuracy = %.4f, mean average precision = %.4f')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch,
                             accuracy_value, np.mean(precision_value)))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

        summary = tf.Summary()
        summary.value.add(tag='Accuracy (raw)', simple_value=float(accuracy_value))
        for i,s in enumerate(CLASSES):
          summary.value.add(tag="precision/"+s+" (raw)",simple_value=float(precision_value[i]))
          summary.value.add(tag="accs/"+s+" (raw)",simple_value=float(cat_accs_val[i]))
#        summary.value.add(tag='Human precision (raw)', simple_value=float(precision_value))
        summary_writer.add_summary(summary, step)
        print("hundred steps")
      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        print("thousand steps")
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
def main_fun(argv, ctx):
  import tensorflow as tf
  import cifar10

  sys.argv = argv
  FLAGS = tf.app.flags.FLAGS
  tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train',
                             """Directory where to write event logs """
                             """and checkpoint.""")
  tf.app.flags.DEFINE_integer('max_steps', 1000000,
                              """Number of batches to run.""")
  tf.app.flags.DEFINE_boolean('log_device_placement', False,
                              """Whether to log device placement.""")
  tf.app.flags.DEFINE_boolean('rdma', False, """Whether to use rdma.""")

  # cifar10.maybe_download_and_extract()
  if tf.gfile.Exists(FLAGS.train_dir):
    tf.gfile.DeleteRecursively(FLAGS.train_dir)
  tf.gfile.MakeDirs(FLAGS.train_dir)

  cluster_spec, server = TFNode.start_cluster_server(ctx, 1, FLAGS.rdma)

  # Train CIFAR-10 for a number of steps.
  with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1

      def before_run(self, run_context):
        self._step += 1
        self._start_time = time.time()
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        duration = time.time() - self._start_time
        loss_value = run_values.results
        if self._step % 10 == 0:
          num_examples_per_step = FLAGS.batch_size
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
def main(_):

    class _LoggerHook(tf.train.SessionRunHook):
        """Logs loss and runtime."""

        def begin(self):
            self._step = -1

        def before_run(self, run_context):
            self._step += 1
            self._start_time = time.time()
            return tf.train.SessionRunArgs(loss)  # Asks for loss value.

        def after_run(self, run_context, run_values):
            duration = time.time() - self._start_time
            loss_value = run_values.results
            if self._step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                            'sec/batch)')
                print (format_str % (datetime.now(), self._step, loss_value,
                                    examples_per_sec, sec_per_batch))
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    server = tf.train.Server(cluster,
                            job_name=FLAGS.job_name,
                            task_index=FLAGS.task_index)

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":

        # Assigns ops to the local worker by default.
        with tf.device(tf.train.replica_device_setter(
            worker_device="/job:worker/task:%d" % FLAGS.task_index,
            cluster=cluster)):

            global_step = tf.contrib.framework.get_or_create_global_step()

            # Get images and labels for CIFAR-10.
            images, labels = cifar10.distorted_inputs()

            # Build inference Graph.
            logits = cifar10.inference(images)

            # Build the portion of the Graph calculating the losses. Note that we will
            # assemble the total_loss using a custom function below.
            loss = cifar10.loss(logits, labels)

            # Build a Graph that trains the model with one batch of examples and
            # updates the model parameters.
            train_op = cifar10.train(loss,global_step)

        # The StopAtStepHook handles stopping after running given steps.
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), _LoggerHook()]

        # The MonitoredTrainingSession takes care of session initialization,
        # restoring from a checkpoint, saving to a checkpoint, and closing when done
        # or an error occurs.
        with tf.train.MonitoredTrainingSession(master=server.target,
                                                is_chief=(FLAGS.task_index == 0),
                                                checkpoint_dir=FLAGS.train_dir,
                                                save_checkpoint_secs=60,
                                                hooks=hooks) as mon_sess:
            while not mon_sess.should_stop():
                # Run a training step asynchronously.
                # See `tf.train.SyncReplicasOptimizer` for additional details on how to
                # perform *synchronous* training.
                # mon_sess.run handles AbortedError in case of preempted PS.
                mon_sess.run(train_op)
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():

    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for CIFAR-10.
    # images, labels = cifar10.standard_distorted_inputs()
    inputs = cifar10.ram_inputs(unit_variance=True, is_train=True)
    images = inputs['images']
    labels = inputs['labels']

    # Batch generator
    batcher = cifar10.Cifar10BatchGenerator(
        inputs['data_images'], inputs['data_labels'], True,
        FLAGS.max_epochs)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images, 3, use_batchnorm=True,
        use_nrelu=False, id_decay=False, add_shortcuts=True, is_train=True)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)

    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement,
        gpu_options=gpu_options))
  
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    step = -1
    while not batcher.is_done():
      step += 1

      batch_im, batch_labs = batcher.next_batch()
      feed_dict = {
          inputs['images_pl']: batch_im,
          inputs['labels_pl']: batch_labs,
        }

      start_time = time.time()
      _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 10 == 0:
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 10 == 0 or batcher.is_done():
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Esempio n. 50
0
import cifar10
from PIL import Image

FLAGS = tf.app.flags.FLAGS
FLAGS.batch_size = 1

filename = tf.placeholder(tf.string)

value = tf.read_file(filename)

images = tf.image.decode_png(value, channels = 3)

resized_images = tf.image.resize_images(images, 124, 124)
img = tf.image.per_image_whitening(resized_images)

logits = cifar10.inference(tf.expand_dims(img, 0))

outputs = tf.nn.softmax(logits)
k_pl = tf.placeholder(tf.int32)
top_k_op = tf.nn.top_k(outputs, k = k_pl)

def classify(session, file_name, k = 1):
    values, indices = session.run(top_k_op, feed_dict = {
        filename: file_name,
        k_pl: k
        })
    return zip(indices.flatten().tolist(), values.flatten().tolist())

def session_with_checkpoint(checkpoint_dir = "./train_old", gpu_enabled = True):
    # disable gpu
    config = tf.ConfigProto(
Esempio n. 51
0
def train():

  # # debug

  # true_classes = np.ndarray(shape=(FLAGS.batch_size, 1), dtype=int)
  # true_classes.fill(2)


  # # Create a pair of constant ops, add the numpy 
  # # array matrices.
  # true_classes_tf_matrix = tf.constant(true_classes, dtype=tf.int64)

  # # playing with introducing the sampler
  # classes_sampler = tf.nn.learned_unigram_candidate_sampler(
  #                                     true_classes_tf_matrix, 
  #                                     1,                # true_classes
  #                                     5,                # num_sampled
  #                                     False,            # unique
  #                                     10,               # range_max
  #                                     seed=None, 
  #                                     name="my_classes_sampler")

  # # print(classes_sampler)
  # # print("debug")
  # # print(classes_sampler.set_sampler)
  # # exit()

  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    # print("images")
    # print(images)
    # images = tf.Print(images, [images])
    # print()
    # print(images[1])

    print("------------------- train calling interference ---------------------")
    print(cifar10.__file__)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(FLAGS.max_steps):

      # manually load the contents of images and labels
      # before calling this sess.run()
      # 1. have Cifar10 dataset in memory
      # 2. create a mini-batch
      # 3. set the placeholders/vars to the the mini-batch data
      # 4. run one forward-backward step

      # print("training step: " + str(step))

      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      # debug, temp change, go back to the one below
      summary_str = sess.run(summary_op)
      # print("summary: " + summary_str)
      summary_writer.add_summary(summary_str, step)
      summary_writer.flush()

      # if step % 100 == 0:
      #   summary_str = sess.run(summary_op)
      #   # print("summary: " + summary_str)
      #   summary_writer.add_summary(summary_str, step)
      #   summary_writer.flush()

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Esempio n. 52
0
def train(lambs):
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)
        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        lambs = tf.constant(lambs, dtype=tf.float32)
        loss = infor.loss(logits, labels, lambs)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op, lr_op = cifar10.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/cifar10_train/model.ckpt-0,
            # extract global_step from it.
            previous_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
            print ('Start training from previous')
        else:
            print('Strart training from step 0')
            previous_step = 0
            init = tf.initialize_all_variables()
            sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph_def=sess.graph_def)

        step = previous_step
        while step <= FLAGS.max_steps:
            start_time = time.time()
            _, loss_value, lr_value = sess.run([train_op, loss, lr_op])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = ('%s: step %d, loss = %.2f, lr_value = %.4f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value, lr_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            step += 1
Esempio n. 53
0
def train():
  """Train CIFAR-10 for a number of steps."""



  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)


    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()


    # # Visualize conv1 features
    # with tf.variable_scope('conv1') as scope_conv:
    #     #tf.get_variable_scope().reuse_variables()
    #     scope_conv.reuse_variables()
    #     weights = tf.get_variable('weights')
    #     grid_x = grid_y = 8   # to get a square grid for 64 conv1 features
    #     grid = put_kernels_on_grid (weights, (grid_y, grid_x))
    #     tf.image_summary('conv1/features', grid, max_images=1)


    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)



    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)


    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time


      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / float(duration)
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)