def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() testImg, testlabels = cifar10.inputs(eval_data=True) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) test_pre = cifar10.inference(testImg,test=True) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time if step % 10 == 0: print ('loss '+str(loss_value)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 10 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #eval if step%10==0: cifar10.accuracy(test_pre,testlabels)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # with tf.variable_scope("cifar10", reuse=tf.AUTO_REUSE) as scope: global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. train_flag = tf.placeholder(tf.bool, shape = ()) trX, trY = cifar10.distorted_inputs() teX, teY = cifar10.inputs(eval_data = True) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(trX) # Calculate accuracy tr_acc = cifar10.accuracy(logits, trY)[1] print(tr_acc, "tr_acc\n") # tr_acc_sum = tf.summary.scalar('train/accuracy', tr_acc) # Calculate loss. loss = cifar10.loss(logits, trY) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) tf.get_variable_scope().reuse_variables() eval_logits = cifar10.inference(teX) te_acc = cifar10.accuracy(eval_logits, teY)[1] print(te_acc, "te_acc\n") # te_acc_sum = tf.summary.scalar('test/accuracy', te_acc) accuracy = tf.cond(train_flag, lambda: tr_acc, lambda: te_acc) tf.summary.scalar("accuracy", accuracy) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('tmp/cifar10/train') test_writer = tf.summary.FileWriter('tmp/cifar10/test') print("Training Starts") # Configs config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth=True mon_sess = tf.train.MonitoredTrainingSession( hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss)],config=config) step = -1 while not mon_sess.should_stop(): step += 1 _,loss_value = mon_sess.run([train_op,loss]) if step % FLAGS.log_frequency == 0: tr_acc,summary = mon_sess.run([accuracy,merged], feed_dict = {train_flag : True}) train_writer.add_summary(summary, step) te_acc, summary = mon_sess.run([accuracy, merged], feed_dict = {train_flag : False}) test_writer.add_summary(summary, step) format_str = ('%s: step %d, loss = %.2f, test accuracy = %.2f, train accuracy = %.2f') print (format_str % (datetime.now(), step, loss_value, te_acc, tr_acc))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, labels) # Calculate loss. loss = cifar10.loss(logits, labels) # Calculate accuracy. accuracy = cifar10.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs( accuracy) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, accuracy = %.4f, (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def trainning(): (X, Y), (X_test, Y_test) = cifar10.load_data() Y = cifar10.to_categorical(Y, 10) Y_test = cifar10.to_categorical(Y_test, 10) data_set = cifar10.read_data_sets(X, Y, X_test, Y_test) # mnist = input_data.read_data_sets("tmp/mnist", one_hot=True) # batch_x, batch_y = data_set.train.next_batch(96) x_placeholder = tf.placeholder("float", [None, 32 * 32 * 3]) y_placeholder = tf.placeholder("float", [None, 10]) logits = cifar10.inference(x_placeholder) loss = cifar10.loss(logits, y_placeholder) train_op = cifar10.train_op(loss=loss, learning_rate=0.001) accuracy = cifar10.accuracy(logits, y_placeholder) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for step in range(MAX_STEPS): # print('step = {:d}'.format(step + 1)) batch_x, batch_y = data_set.train.next_batch(96) # print(batch_x.shape) # print(batch_y.shape) _, Loss, acc = sess.run([train_op, loss, accuracy], feed_dict={ x_placeholder: batch_x, y_placeholder: batch_y }) if (step + 1) % 100 == 0: print("step: {:d} loss: {:f} acc: {:f}".format( step + 1, Loss, acc))
def evaluate(): """Eval CIFAR-10 for a number of steps.""" dataset = input_data.read(FLAGS.input_dir) image_size = dataset.image_size with tf.Graph().as_default(): # Build a Graph that computes the logits predictions from the # inference model. eval_images = tf.placeholder(tf.float32, shape=(2, FLAGS.batch_size, image_size[0], image_size[1], image_size[2])) labels = tf.placeholder(tf.float32, shape=(FLAGS.batch_size)) images, images_p = tf.split(0, 2, train_images) with tf.variable_scope('inference') as scope: logits = cifar10.inference(images) scope.reuse_variables() logits2 = cifar10.inference(images_p) # Calculate predictions. accuracy = cifar10.accuracy(logits, logits2, labels) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def)
def train(): with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images, train = True) loss = cifar10.loss(logits, labels) accuracy = cifar10.accuracy(logits, labels) train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs([loss, accuracy]) def after_run(self, run_context, run_values): if self._step % 10 == 0: loss_value, acc_value = run_values.results format_str = ('step %d, loss = %.2f, accuracy = %.2f ') print (format_str %(self._step, loss_value, acc_value)) with tf.train.MonitoredTrainingSession( checkpoint_dir=train_dir, hooks=[tf.train.StopAtStepHook(last_step=max_step), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=False)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == "test" print(eval_data) images, labels, ground_truth = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits, _ = cifar10.inference(images) print(logits) print(logits.get_shape()) print("after inference node creation") loss = cifar10.loss(logits, labels) accuracy, precision, accuracies = cifar10.accuracy(logits, ground_truth) labels = tf.cast(labels, tf.int64) label_shape = labels.get_shape().as_list() reshaped_labels = tf.reshape(labels, [label_shape[0] * label_shape[1] * label_shape[2]]) logits_shape = logits.get_shape().as_list() reshaped_logits = tf.reshape(logits, [logits_shape[0] * logits_shape[1] * logits_shape[2], logits_shape[3]]) # Calculate predictions. # top_k_op = tf.nn.in_top_k(logits, labels, 1) # top_k_op = tf.nn.in_top_k(reshaped_logits, reshaped_labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: print("evaluate:") eval_once(saver, summary_writer, summary_op, accuracy, precision, accuracies) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): """Train a model for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for a segmentation model. images, labels, ground_truth = cifar10.distorted_inputs() tf.histogram_summary('label_hist/with_ignore', labels) tf.histogram_summary('label_hist/ground_truth', ground_truth) # Build a Graph that computes the logits predictions from the # inference model. print("before inference") print(images.get_shape()) logits, nr_params = cifar10.inference(images) print("nr_params: "+str(nr_params) ) print("after inference") # Calculate loss. loss = cifar10.loss(logits, labels) accuracy, precision, cat_accs = cifar10.accuracy(logits, ground_truth) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # tf.image_summary('images2', images) print (logits) # tf.image_summary('predictions', logits) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') print('Initializing new model') sess.run(init) global_step = 0 # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(global_step, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy_value, precision_value, cat_accs_val = sess.run([train_op, loss, accuracy, precision, cat_accs]) duration = time.time() - start_time print (precision_value) print (cat_accs_val) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' #precision_value = [0 if np.isnan(p) else p for p in precision_value] #print (precision_value) if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)\n Accuracy = %.4f, mean average precision = %.4f') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch, accuracy_value, np.mean(precision_value))) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary = tf.Summary() summary.value.add(tag='Accuracy (raw)', simple_value=float(accuracy_value)) for i,s in enumerate(CLASSES): summary.value.add(tag="precision/"+s+" (raw)",simple_value=float(precision_value[i])) summary.value.add(tag="accs/"+s+" (raw)",simple_value=float(cat_accs_val[i])) # summary.value.add(tag='Human precision (raw)', simple_value=float(precision_value)) summary_writer.add_summary(summary, step) print("hundred steps") # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: print("thousand steps") checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(model_fn, train_folder, qn_id): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = model_fn(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Calculate accuracy model_accuracy = cifar10.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. global_step = tf.train.get_or_create_global_step() train_op = cifar10.train(loss, model_accuracy, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._start_time = time.time() def after_create_session(self, session, coord): self._step = session.run(global_step) def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs([loss, model_accuracy ]) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results[0] acc_value = run_values.results[1] examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s - %s: step %d, loss = %.2f, acc = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (qn_id, datetime.now(), self._step, loss_value, acc_value, examples_per_sec, sec_per_batch)) class _StopAtHook(tf.train.SessionRunHook): def __init__(self, last_step): self._last_step = last_step def after_create_session(self, session, coord): self._step = session.run(global_step) def before_run(self, run_context): # pylint: disable=unused-argument self._step += 1 return tf.train.SessionRunArgs(global_step) def after_run(self, run_context, run_values): if self._step >= self._last_step: run_context.request_stop() # class _StopAtHook(tf.train.StopAtStepHook): # def __init__(self, last_step): # super().__init__(last_step=last_step) # # def begin(self): # self._global_step_tensor = global_step # # def before_run(self, run_context): # pylint: disable=unused-argument # return tf.train.SessionRunArgs(global_step) # # def after_run(self, run_context, run_values): # gs = run_values.results + 1 # print("\tgs = {}/{}".format(gs, self._last_step)) # if gs >= self._last_step: # # Check latest global step to ensure that the targeted last step is # # reached. global_step read tensor is the value of global step # # before running the operation. We're not sure whether current session.run # # incremented the global_step or not. Here we're checking it. # # step = run_context.session.run(self._global_step_tensor) # print("\t\tstep: {}. gs = {}/{}".format(step, gs, self._last_step)) # if step >= self._last_step: # run_context.request_stop() saver = tf.train.Saver() with tf.train.MonitoredTrainingSession( checkpoint_dir=train_folder, hooks=[ _StopAtHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: latest_checkpoint_path = tf.train.latest_checkpoint(train_folder) if latest_checkpoint_path is not None: # Restore from checkpoint print("Restoring checkpoint from %s" % latest_checkpoint_path) saver.restore(mon_sess, latest_checkpoint_path) while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): train_images, train_labels = cifar10.distorted_inputs() val_images, val_labels = cifar10.distorted_inputs() test_images, test_labels = cifar10.inputs(eval_data=True) # Build a Graph that computes the logits predictions from the # inference model. train_logits = cifar10.inference(train_images) train_acc = cifar10.accuracy(train_labels, train_logits) # Calculate loss. loss = cifar10.loss(train_logits, train_labels) # validation #tf.get_variable_scope().reuse_variables() val_logits = cifar10.inference(val_images) val_acc = cifar10.accuracy(val_labels, val_logits) test_logits = cifar10.inference(test_images) test_acc = cifar10.accuracy(test_labels, test_logits) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step, train_acc, val_acc, test_acc) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train_rnn(train_loader, test_loader, lr, momentum, report=20, cell='RNN', hidden=64, logdir='results/rnn'): writer = SummaryWriter(logdir) ntrain = 1000 # per class ntest = 100 # per class nclass = 10 # number of classes imsize = 28 batchsize = 100 nsamples = ntrain * nclass net = RNN(cell, hidden) net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum) running_loss = 0.0 # batch_xs = torch.tensor(np.zeros((batchsize, imsize, imsize)), device='cuda', dtype=torch.float32)#setup as [batchsize, width, height, numberOfChannels] and use np.zeros() # batch_ys = torch.tensor(np.zeros(batchsize), device='cuda', dtype=torch.long)#setup as [batchsize, the how many classes] test_xs, test_ys = next(iter(test_loader)) test_xs = test_xs.cuda() test_ys = test_ys.cuda() for epoch in range(1): for i, (batch_xs, batch_ys) in enumerate(train_loader): batch_xs = batch_xs.view(-1, 28, 28).cuda() batch_ys = batch_ys.cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(batch_xs) loss = criterion(outputs, batch_ys) loss.backward() optimizer.step() running_loss += loss.item() if ( i + 1 ) % report == 0: # print every {report * batch_size} iterations print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / report)) writer.add_scalar('training_loss', running_loss / report, epoch * nsamples + (i + 1) * batchsize) running_loss = 0.0 train_accuracy = accuracy(net, batch_xs.view(-1, 28, 28).float(), batch_ys) print(f'train_accuracy:{train_accuracy:.3f}') writer.add_scalar('training_accuracy', train_accuracy, epoch * nsamples + (i + 1) * batchsize) test_accuracy = accuracy(net, test_xs.view(-1, 28, 28).float(), test_ys) print(f'test_accuracy:{test_accuracy:.3f}') writer.add_scalar('test_accuracy', test_accuracy, epoch * nsamples + (i + 1) * batchsize) return net
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # avg = tf.reduce_mean(labels) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, dropout=0.8, reuse=False) # Calculate loss. loss = cifar10.loss(logits, labels) # calculate accuracy for training set acc = cifar10.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Setup cross validation right here # get a batch of xvalidation images # eval_data=False: use cross validation, not test test set val_images, val_labels = cifar10.inputs(eval_data=False) val_logits = cifar10.inference(val_images, dropout=1, reuse=True) top_k_op = tf.nn.in_top_k(val_logits, val_labels, 1) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value, acc_value = sess.run([train_op, loss, acc]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f, acc = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 100 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # run the xvalidation prediction = sess.run([top_k_op]) precision = np.sum(prediction) / FLAGS.batch_size print(("At step %d cross validation precision: %.3f") % (step, precision))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) accuracy = cifar10.accuracy(logits, labels) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) mon_sess = tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], save_checkpoint_steps=200, config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # as mon_sess: cifar10.maybe_download_and_extract() if tf.gfile.Exists(FLAGS.eval_dir): tf.gfile.DeleteRecursively(FLAGS.eval_dir) tf.gfile.MakeDirs(FLAGS.eval_dir) """Validate CIFAR-10 for a number of steps""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data, valData=True) print(images.shape) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) #sess = tf.Session(graph=g) #Training Loop: Evaluate Validation accuracy after 390 steps almost an epoch for i in range(FLAGS.max_steps): if i % 390 == 0: eval_once(saver, summary_writer, top_k_op, summary_op) mon_sess.run(train_op)