def evaluate(): # Get the list of filenames and corresponding list of labels for training et validation train_filenames, train_labels = scene_input.list_images('train') val_filenames, val_labels = scene_input.list_images('validation') with tf.Graph().as_default() as g: keep_prob = tf.placeholder(tf.float32) images, labels, train_data_init_op, val_data_init_op \ = scene_input.get_dataset(train_filenames, train_labels, val_filenames, val_labels, batch_size) with tf.name_scope('inference'): conv_net = vgg.Vgg16() conv_net.build(images, keep_prob, scene_input.num_classes) logits = conv_net.get_softmax_linear() # Calculate predictions. top_1_op = tf.nn.in_top_k(logits, labels, 1) top_3_op = tf.nn.in_top_k(logits, labels, 3) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: raise ValueError( "Cannot find checkpoint data in {}".format(checkpoint_dir)) # accuracy on validation set. acc_top1, acc_top3 = get_accuracy(sess, top_1_op, top_3_op, val_data_init_op, keep_prob) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc_top1, acc_top3))
def evaluate_10crop_3(): val_filenames, val_labels = scene_input.list_images('validation') with tf.Graph().as_default() as g: keep_prob = tf.placeholder(tf.float32) images_10crop_batched, label, val_data_init_op \ = scene_input.get_dataset_10crop_eval(val_filenames, val_labels) with tf.name_scope('inference'): conv_net = vgg.Vgg16() conv_net.build(images_10crop_batched, keep_prob, scene_input.num_classes) prob_10crop = tf.reduce_max(conv_net.get_softmax(), axis=0) # Calculate predictions. print("prob_10crop.shape: {}".format(prob_10crop.shape)) print("label.shape: {}".format(label.shape)) prob_10crop = tf.reshape(prob_10crop, [-1, scene_input.num_classes]) label = tf.reshape(label, [-1]) print("prob_10crop.shape: {}".format(prob_10crop.shape)) print("label.shape: {}".format(label.shape)) top_1_op = tf.nn.in_top_k(prob_10crop, label, 1) top_3_op = tf.nn.in_top_k(prob_10crop, label, 3) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: raise ValueError( "Cannot find checkpoint data in {}".format(checkpoint_dir)) # accuracy on validation set. acc_top1, acc_top3 = get_accuracy_10crop(sess, top_1_op, top_3_op, val_data_init_op, keep_prob) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc_top1, acc_top3))
def main_resnet(): # Get the list of filenames and corresponding list of labels for training et validation train_filenames, train_labels = scene_input.list_images('train') val_filenames, val_labels = scene_input.list_images('validation') # -------------------------------------------------------------------------- # In TensorFlow, you first want to define the computation graph with all the # necessary operations: loss, training op, accuracy... # Any tensor created in the `graph.as_default()` scope will be part of `graph` graph = tf.Graph() with graph.as_default(): global_step = tf.contrib.framework.get_or_create_global_step() is_training = tf.placeholder(tf.bool, [], "is_training") dropout_keep_prob = tf.placeholder(tf.float32, [], "dropout_keep_prob") images, labels, train_data_init_op, val_data_init_op \ = scene_input.get_dataset_with_random_scale(train_filenames, train_labels, val_filenames, val_labels, batch_size) if not use_minimal_summary: tf.image_summary('images', images) with tf.name_scope('inference'): logits= inference_resnet( images, is_training, resnet_layer, dropout_keep_prob) with tf.name_scope('loss'): loss_ = loss_resnet(logits, labels) with tf.name_scope('train'): full_train_op = train_resnet(loss_, global_step) with tf.name_scope('evaluation'): # Evaluation metrics prediction = tf.to_int32(tf.argmax(logits, 1)) labels = tf.to_int32(labels) correct_prediction = tf.equal(prediction, labels) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) correct_prediction_top3 = tf.nn.in_top_k(logits, labels, 3) accuracy_top3 = tf.reduce_mean(tf.cast(correct_prediction_top3, tf.float32)) init_var_op = tf.global_variables_initializer() merged_summary = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) # tf.get_default_graph().finalize() # -------------------------------------------------------------------------- # Now that we have built the graph and finalized it, we define the session. # The session is the interface to *run* the computational graph. # We can call our training operations with `sess.run(train_op)` for instance print("writing acc&loss info to {}".format(acc_loss_path)) with tf.Session(graph=graph) as sess, open(acc_loss_path, "w") as f: sess.run(init_var_op) # tensorboard writer. writer = tf.summary.FileWriter(log_dir, graph) # restore pretrained model (only conv layers). variables_to_restore = resnet.get_variables_except_fc() # variables_to_restore = tf.get_collection(resnet.RESNET_VARIABLES) pretrained_model_saver = tf.train.Saver(variables_to_restore) pretrained_model_path = os.path.join(model_dir, resnet.checkpoint_fn(resnet_layer)) if os.path.exists(pretrained_model_path): print("Loading pretrained model from :{}".format(pretrained_model_path)) pretrained_model_saver.restore(sess, pretrained_model_path) else: raise ValueError( "Cannot find pretrained model: {}".format(pretrained_model_path)) # check model_dir for checkpoint file. restore_epoch = 13 # designate a specific epoch to restore or None. ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if restore_epoch == None and ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) restore_epoch = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[1]) restore_step = (restore_epoch - 1) * steps_per_epoch sess.run(global_step.assign(restore_step)) print("restore check point from: {}".format(ckpt.model_checkpoint_path)) print("get epoch: {} step: {}".format(restore_epoch, restore_step)) elif restore_epoch != None: restore_model_path = os.path.join(checkpoint_dir, "scene-{}".format(restore_epoch)) saver.restore(sess, restore_model_path) restore_step = (restore_epoch - 1) * steps_per_epoch sess.run(global_step.assign(restore_step)) print("restore check point from: {}".format(restore_model_path)) print("get epoch: {} step: {}".format(restore_epoch, restore_step)) else: print('No checkpoint found.') # Train the entire model for a few more epochs, continuing with the *same* weights. start_time = time.time() f.write("sampled train acc, sampled train loss, val acc top1, val acc top3\n") for epoch in range(num_epochs): if restore_epoch is not None and epoch < restore_epoch: continue else: print('epoch {} / {}'.format(epoch + 1, num_epochs)) tick = time.time() sess.run(train_data_init_op) for i in tqdm.tqdm(range(steps_per_epoch)): try: if tensorboard_write_frq > 0 and i % tensorboard_write_frq == 0: feed_dict = {is_training: True, dropout_keep_prob: keep_prob} _, summary = sess.run([full_train_op, merged_summary], feed_dict) writer.add_summary(summary, epoch * steps_per_epoch + i) else: feed_dict = {is_training: True, dropout_keep_prob: keep_prob} _ = sess.run(full_train_op, feed_dict) except tf.errors.OutOfRangeError: break tock = time.time() # print(check_time(tock - start_time, steps_per_epoch, tock - tick)) # check point if (epoch + 1) % checkpoint_freq == 0: saver.save(sess, os.path.join(checkpoint_dir, 'scene'), global_step=epoch + 1) # Check on the train and val sets every epoch. train_acc, train_loss = check_train(sess, correct_prediction, is_training, train_data_init_op, dropout_keep_prob=dropout_keep_prob, n_batch=int(5000/batch_size), loss=loss_) print('Train: accuracy {0:.4f} loss {1:.4f}'.format(train_acc, train_loss)) val_acc, val_acc_top3 = check_val(sess, correct_prediction, is_training, val_data_init_op, dropout_keep_prob=dropout_keep_prob, correct_prediction_top3=correct_prediction_top3) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format(val_acc, val_acc_top3)) f.write("{epoch} {0:.4f} {1:.4f} {2:.4f} {3:.4f}\n".format( train_acc, train_loss, val_acc, val_acc_top3, epoch=epoch + 1))
def evaluate_class_level(): raise NotImplementedError("dropout") val_filenames, val_labels = scene_input.list_images('validation') train_filenames, train_labels = scene_input.list_images('train') with tf.Graph().as_default() as g: is_training = tf.placeholder(tf.bool, [], "is_training") images_10crop_batched, label, train_data_init_op, val_data_init_op \ = scene_input.get_dataset_10crop_train_eval(train_filenames, train_labels, val_filenames, val_labels) with tf.name_scope('inference'): logits = scene.inference_resnet(images_10crop_batched, is_training, layer=c.resnet_layer) prob_10crop = tf.reduce_mean(logits, axis=0) # Calculate predictions. print("prob_10crop.shape: {}".format(prob_10crop.shape)) print("label.shape: {}".format(label.shape)) prob_10crop = tf.reshape(prob_10crop, [-1, c.num_classes]) label = tf.reshape(label, [-1]) print("prob_10crop.shape: {}".format(prob_10crop.shape)) print("label.shape: {}".format(label.shape)) top_1_op = tf.nn.in_top_k(prob_10crop, label, 1) top_3_op = tf.nn.in_top_k(prob_10crop, label, 3) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver_with_moving_average = tf.train.Saver(variables_to_restore) print("saving class level accuracy to {}".format(class_level_acc_file)) with tf.Session() as sess, open(class_level_acc_file, "a") as f: f.write("class level acc\n") f.write("evaluation time: {}\n".format(time.time())) f.write( "each 5 row: epoch, train_top1, train_top3, val_top1, val_top3\n" ) f.write("class index: \n") info = "" for i in range(c.num_classes): info += "{:>8}".format(i) f.write(info + "\n") list_enumerate_epoch = [12, 13, 14] for n_epoch in list_enumerate_epoch: restore_epoch = "{}".format(n_epoch) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) restore_checkpoint_path = os.path.join( checkpoint_dir, "scene-" + restore_epoch) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint restore_checkpoint_path = ckpt.model_checkpoint_path elif not tf.train.checkpoint_exists(restore_checkpoint_path): raise ValueError( "Cannot find checkpoint file: {} or in dir: {}".format( restore_checkpoint_path, checkpoint_dir)) # print("variables in checkpoint file {}".format(restore_checkpoint_path)) # utils.print_tensors_in_checkpoint_file( # file_name=restore_checkpoint_path, tensor_name=None, all_tensors=True) print("restore checkpoint from file: {}".format( restore_checkpoint_path)) print("restore global variables with moving average.") saver_with_moving_average.restore(sess, restore_checkpoint_path) # accuracy on validation set. train_top1, train_top3, val_top1, val_top3 = get_class_level_acc_10crop_train_val( sess=sess, top_1_op=top_1_op, top_3_op=top_3_op, label=label, train_data_init_op=train_data_init_op, val_data_init_op=val_data_init_op, is_training=is_training) info = "{}\n".format(n_epoch) for l in [train_top1, train_top3, val_top1, val_top3]: for i in range(c.num_classes): info += "{0:>8.4f}".format(l[i]) info += "\n" print(info) f.write(info)
def evaluate_10crop_resnet_with_check(): raise NotImplementedError("dropout") val_filenames, val_labels = scene_input.list_images('validation') with tf.Graph().as_default() as g: is_training = tf.placeholder(tf.bool, [], "is_training") images_10crop_batched, label, filename, val_data_init_op \ = scene_input.get_dataset_10crop_eval_with_filename(val_filenames, val_labels) with tf.name_scope('inference'): logits = scene.inference_resnet( images_10crop_batched, is_training, layer=c.resnet_layer, dropout_keep_prob=dropout_keep_prob) prob_10crop = tf.reduce_mean(logits, axis=0) # Calculate predictions. prob_10crop = tf.reshape(prob_10crop, [-1, scene_input.num_classes]) label = tf.reshape(label, [-1]) top_1_op = tf.nn.in_top_k(prob_10crop, label, 1) top_3_op = tf.nn.in_top_k(prob_10crop, label, 3) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver_with_moving_average = tf.train.Saver(variables_to_restore) with tf.Session() as sess: restore_epoch = "23" ckpt = tf.train.get_checkpoint_state(checkpoint_dir) restore_checkpoint_path = os.path.join(checkpoint_dir, "scene-" + restore_epoch) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint restore_checkpoint_path = ckpt.model_checkpoint_path elif not tf.train.checkpoint_exists(restore_checkpoint_path): raise ValueError( "Cannot find checkpoint file: {} or in dir: {}".format( restore_checkpoint_path, checkpoint_dir)) print("restore checkpoint from file: {}".format( restore_checkpoint_path)) print("restore global variables with moving average.") saver_with_moving_average.restore(sess, restore_checkpoint_path) # accuracy on validation set. sess.run(val_data_init_op) num_correct, num_correct_top3 = 0, 0 print( "writing wrong image info to {}".format(wrong_image_info_file)) with open(wrong_image_info_file, "w") as f: for i in tqdm.tqdm(range(num_examples_per_epoch_for_val)): try: correct_pred, correct_pred_top3, name, _prob_10crop, _label = sess.run( [top_1_op, top_3_op, filename, prob_10crop, label], {is_training: False}) num_correct += correct_pred.sum() num_correct_top3 += correct_pred_top3.sum() if correct_pred_top3.sum() == 0: # filename, top3 inference, label _prob_10crop = _prob_10crop[0] _label = _label[0] sorted_index = np.argsort(_prob_10crop) top3_label = sorted_index[-3:] wrong_image_info = "{} {} {} {} {}\n".format( name, top3_label[2], top3_label[1], top3_label[0], _label) f.write(wrong_image_info) except tf.errors.OutOfRangeError: break # Return the fraction of datapoints that were correctly classified acc = float(num_correct) / num_examples_per_epoch_for_val acc_top3 = float(num_correct_top3) / num_examples_per_epoch_for_val print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc, acc_top3))
def evaluate_90crop_resnet(): raise NotImplementedError("dropout") val_filenames, val_labels = scene_input.list_images('validation') with tf.Graph().as_default() as g: is_training = tf.placeholder(tf.bool, [], "is_training") images_90crop_batched, label, val_data_init_op \ = scene_input.get_dataset_90crop_eval(val_filenames, val_labels) print("images_90crop_batched.shape: {}".format( images_90crop_batched.shape)) with tf.name_scope('inference'): logits = scene.inference_resnet(images_90crop_batched, is_training, layer=c.resnet_layer) prob_90crop = tf.reduce_mean(logits, axis=0) # Calculate predictions. print("prob_90crop.shape: {}".format(prob_90crop.shape)) print("label.shape: {}".format(label.shape)) prob_90crop = tf.reshape(prob_90crop, [-1, c.num_classes]) label = tf.reshape(label, [-1]) print("prob_90crop.shape: {}".format(prob_90crop.shape)) print("label.shape: {}".format(label.shape)) top_1_op = tf.nn.in_top_k(prob_90crop, label, 1) top_3_op = tf.nn.in_top_k(prob_90crop, label, 3) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver_with_moving_average = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: list_acc_top1_moving_average, list_acc_top3_moving_average = [], [] list_enumerate_epoch = range(10, 20, 1) for n_epoch in list_enumerate_epoch: restore_epoch = "{}".format(n_epoch) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) restore_checkpoint_path = os.path.join( checkpoint_dir, "scene-" + restore_epoch) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint restore_checkpoint_path = ckpt.model_checkpoint_path elif not tf.train.checkpoint_exists(restore_checkpoint_path): raise ValueError( "Cannot find checkpoint file: {} or in dir: {}".format( restore_checkpoint_path, checkpoint_dir)) print("restore checkpoint from file: {}".format( restore_checkpoint_path)) print("restore global variables with moving average.") saver_with_moving_average.restore(sess, restore_checkpoint_path) # accuracy on validation set. acc_top1, acc_top3 = get_accuracy_resnet( sess, top_1_op, top_3_op, val_data_init_op, is_training) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc_top1, acc_top3)) list_acc_top1_moving_average.append(acc_top1) list_acc_top3_moving_average.append(acc_top3) for i in range(len(list_enumerate_epoch)): print("{0:} {1:.4f} {2:.4f}".format( list_enumerate_epoch[i], list_acc_top1_moving_average[i], list_acc_top3_moving_average[i]))
def evaluate_10crop_resnet(): val_filenames, val_labels = scene_input.list_images('validation') with tf.Graph().as_default() as g: is_training = tf.placeholder(tf.bool, [], "is_training") dropout_keep_prob = tf.placeholder(tf.float32, [], "dropout_keep_prob") images_10crop_batched, label, val_data_init_op \ = scene_input.get_dataset_10crop_eval(val_filenames, val_labels) with tf.name_scope('inference'): logits = scene.inference_resnet( images_10crop_batched, is_training, layer=c.resnet_layer, dropout_keep_prob=dropout_keep_prob) prob_10crop = tf.reduce_mean(logits, axis=0) # Calculate predictions. # print("prob_10crop.shape: {}".format(prob_10crop.shape)) # print("label.shape: {}".format(label.shape)) prob_10crop = tf.reshape(prob_10crop, [-1, c.num_classes]) label = tf.reshape(label, [-1]) # print("prob_10crop.shape: {}".format(prob_10crop.shape)) # print("label.shape: {}".format(label.shape)) top_1_op = tf.nn.in_top_k(prob_10crop, label, 1) top_3_op = tf.nn.in_top_k(prob_10crop, label, 3) # Restore not trainable variables # list_all_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # list_all_trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # print("len(list_all_variables): {}".format(len(list_all_variables))) # for v in list_all_variables: # print(v) # print("len(list_all_trainable_variables): {}".format(len(list_all_trainable_variables))) # for v in list_all_trainable_variables: # print(v) # variables_to_restore = {} # for v in list_all_variables: # if v not in list_all_trainable_variables: # variables_to_restore[v.name] = v # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( scene.MOVING_AVERAGE_DECAY) # NOTE: if no moving average in checkpoint file, default save variables without moving average. variables_to_restore = variable_averages.variables_to_restore() # print("len(variables_to_restore): {}".format(len(variables_to_restore))) # for v in variables_to_restore.keys(): # print("{} --> {}".format(v, variables_to_restore[v])) saver_with_moving_average = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: list_acc_top1_moving_average, list_acc_top3_moving_average = [], [] list_acc_top1, list_acc_top3 = [], [] list_enumerate_epoch = range(1, 31, 1) for n_epoch in list_enumerate_epoch: restore_epoch = "{}".format(n_epoch) ckpt = None # tf.train.get_checkpoint_state(checkpoint_dir) restore_checkpoint_path = os.path.join( checkpoint_dir, "scene-" + restore_epoch) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint restore_checkpoint_path = ckpt.model_checkpoint_path elif not tf.train.checkpoint_exists(restore_checkpoint_path): raise ValueError( "Cannot find checkpoint file: {} or in dir: {}".format( restore_checkpoint_path, checkpoint_dir)) # print("variables in checkpoint file {}".format(restore_checkpoint_path)) # utils.print_tensors_in_checkpoint_file( # file_name=restore_checkpoint_path, tensor_name=None, all_tensors=True) print("restore checkpoint from file: {}".format( restore_checkpoint_path)) print("restore global variables.") saver.restore(sess, restore_checkpoint_path) # accuracy on validation set. acc_top1, acc_top3 = get_accuracy_resnet( sess, top_1_op, top_3_op, val_data_init_op, is_training, dropout_keep_prob) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc_top1, acc_top3)) list_acc_top1.append(acc_top1) list_acc_top3.append(acc_top3) print("restore global variables with moving average.") saver_with_moving_average.restore(sess, restore_checkpoint_path) # accuracy on validation set. acc_top1, acc_top3 = get_accuracy_resnet( sess, top_1_op, top_3_op, val_data_init_op, is_training, dropout_keep_prob) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format( acc_top1, acc_top3)) list_acc_top1_moving_average.append(acc_top1) list_acc_top3_moving_average.append(acc_top3) for i in range(len(list_enumerate_epoch)): epoch = list_enumerate_epoch[i] print("{0} {1:.4f} {2:.4f} {3:.4f} {4:.4f}".format( epoch, list_acc_top1[i], list_acc_top3[i], list_acc_top1_moving_average[i], list_acc_top3_moving_average[i]))
def main(): # Get the list of filenames and corresponding list of labels for training et validation train_filenames, train_labels = scene_input.list_images('train') val_filenames, val_labels = scene_input.list_images('validation') # -------------------------------------------------------------------------- # In TensorFlow, you first want to define the computation graph with all the # necessary operations: loss, training op, accuracy... # Any tensor created in the `graph.as_default()` scope will be part of `graph` graph = tf.Graph() with graph.as_default(): global_step = tf.contrib.framework.get_or_create_global_step() keep_prob = tf.placeholder(tf.float32) images, labels, train_data_init_op, val_data_init_op \ = scene_input.get_dataset_with_color_augmentation(train_filenames, train_labels, val_filenames, val_labels, batch_size) with tf.name_scope('inference'): conv_net = vgg.Vgg16() conv_net.build(images, keep_prob, scene_input.num_classes) logits = conv_net.get_softmax_linear() with tf.name_scope('loss'): # Calculate the average cross entropy loss across the batch. labels = tf.cast(labels, tf.int64) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name='cross_entropy_per_example') loss = tf.reduce_mean(cross_entropy, name='cross_entropy_loss') with tf.name_scope('train'): full_train_op = train_momentum_sgd(loss, global_step) with tf.name_scope('evaluation'): # Evaluation metrics prediction = tf.to_int32(tf.argmax(logits, 1)) labels = tf.to_int32(labels) correct_prediction = tf.equal(prediction, labels) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) correct_prediction_top3 = tf.nn.in_top_k(logits, labels, 3) accuracy_top3 = tf.reduce_mean(tf.cast(correct_prediction_top3, tf.float32)) init_var_op = tf.global_variables_initializer() merged_summary = tf.summary.merge_all() saver = tf.train.Saver() tf.get_default_graph().finalize() # -------------------------------------------------------------------------- # Now that we have built the graph and finalized it, we define the session. # The session is the interface to *run* the computational graph. # We can call our training operations with `sess.run(train_op)` for instance with tf.Session(graph=graph) as sess: sess.run(init_var_op) # tensorboard writer. writer = tf.summary.FileWriter(log_dir, graph) # check model_dir for checkpoint file. restore_epoch = None ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/model/model-10.xxx, # extract epoch from it. restore_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[2] sess.run(global_step.assign((restore_epoch - 1) * steps_per_epoch)) print("restore check point from: {}".format(ckpt.model_checkpoint_path)) print("get epoch: {} step: {}".format(restore_epoch, (restore_epoch - 1) * steps_per_epoch)) else: print('training whole conv net from scratch.') start_time = time.time() # Train the entire model for a few more epochs, continuing with the *same* weights. for epoch in range(num_epochs): if restore_epoch is not None and epoch < restore_epoch: continue else: print('epoch {} / {}'.format(epoch + 1, num_epochs)) tick = time.time() sess.run(train_data_init_op) for i in tqdm.tqdm(range(steps_per_epoch)): try: if tensorboard_write_frq > 0 and i % tensorboard_write_frq == 0: _, summary = sess.run([full_train_op, merged_summary], {keep_prob: dropout_keep_prob}) writer.add_summary(summary, epoch * steps_per_epoch + i) else: _ = sess.run(full_train_op, {keep_prob: dropout_keep_prob}) except tf.errors.OutOfRangeError: break tock = time.time() print(check_time(tock - start_time, steps_per_epoch, tock - tick)) # check point if (epoch + 1) % checkpoint_freq == 0: saver.save(sess, os.path.join(checkpoint_dir, 'scene'), global_step=epoch + 1) # Check on the train and val sets every epoch. train_acc, train_loss = check_train(sess, correct_prediction, keep_prob, train_data_init_op, n_batch=100, loss=loss) print('Train: accuracy {0:.4f} loss {1:.4f}'.format(train_acc, train_loss)) val_acc, val_acc_top3 = check_val(sess, correct_prediction, keep_prob, val_data_init_op, correct_prediction_top3=correct_prediction_top3) print('Val: accuracy (top1){0:.4f} (top3){1:.4f}'.format(val_acc, val_acc_top3))