def evaluate(model_spec, model_dir, params, restore_from): """Evaluate the model Args: model_spec: (dict) contains the graph operations or nodes needed for evaluation model_dir: (string) directory containing config, weights and log params: (Params) contains hyperparameters of the model. Must define: num_epochs, train_size, batch_size, eval_size, save_summary_steps restore_from: (string) directory or file containing weights to restore the graph """ # Initialize tf.Saver saver = tf.train.Saver() with tf.Session() as sess: # Initialize the lookup table sess.run(model_spec['variable_init_op']) # Reload weights from the weights subdirectory save_path = os.path.join(model_dir, restore_from) if os.path.isdir(save_path): save_path = tf.train.latest_checkpoint(save_path) saver.restore(sess, save_path) # Evaluate num_steps = (params.eval_size + params.batch_size - 1) // params.batch_size metrics = evaluate_sess(sess, model_spec, num_steps) metrics_name = '_'.join(restore_from.split('/')) save_path = os.path.join(model_dir, "metrics_test_{}.json".format(metrics_name)) save_dict_to_json(metrics, save_path)
def evaluate(model_spec, model_dir, params, restore_from): """Evaluate the model Args: model_spec: (dict) contains the graph operations or nodes needed for evaluation model_dir: (string) directory containing config, weights and log params: (Params) contains hyperparameters of the model. Must define: num_epochs, train_size, batch_size, eval_size, save_summary_steps restore_from: (string) directory or file containing weights to restore the graph """ # Initialize tf.Saver saver = tf.train.Saver() with tf.Session() as sess: # Initialize the lookup table sess.run(model_spec['variable_init_op']) # Reload weights from the weights subdirectory save_path = os.path.join(model_dir, restore_from) if os.path.isdir(save_path): save_path = tf.train.latest_checkpoint(save_path) saver.restore(sess, save_path) # Evaluate num_steps = (params.eval_size + params.batch_size - 1) // params.batch_size metrics = evaluate_sess(sess, model_spec, num_steps) metrics_name = '_'.join(restore_from.split('/')) save_path = os.path.join(os.path.join(model_dir, restore_from), "metrics_test_{}.json".format(metrics_name)) save_dict_to_json(metrics, save_path) sess.run(model_spec['iterator_init_op']) for _ in range(num_steps): labels, probabilities, predictions = sess.run([ model_spec['labels'], model_spec['probabilities'], model_spec['predictions'] ]) probab = np.max(probabilities, axis=1) for i in range(len(labels)): print( f'Label is: {labels[i]} and prediction is {predictions[i]} with probability {probab[i]}' )
def train_and_evaluate(train_model_spec, eval_model_spec, model_dir, params, restore_from=None): """Train the model and evaluate every epoch. Args: train_model_spec: (dict) contains the graph operations or nodes needed for training eval_model_spec: (dict) contains the graph operations or nodes needed for evaluation model_dir: (string) directory containing config, weights and log params: (Params) contains hyperparameters of the model. Must define: num_epochs, train_size, batch_size, eval_size, save_summary_steps restore_from: (string) directory or file containing weights to restore the graph """ # Initialize tf.Saver instances to save weights during training last_saver = tf.train.Saver() # will keep last 5 epochs best_saver = tf.train.Saver( max_to_keep=1) # only keep 1 best checkpoint (best on eval) begin_at_epoch = 0 with tf.Session() as sess: # Initialize model variables sess.run(train_model_spec['variable_init_op']) # Reload weights from directory if specified if restore_from is not None: logging.info("Restoring parameters from {}".format(restore_from)) if os.path.isdir(restore_from): restore_from = tf.train.latest_checkpoint(restore_from) begin_at_epoch = int(restore_from.split('-')[-1]) last_saver.restore(sess, restore_from) # For tensorboard (takes care of writing summaries to files) train_writer = tf.summary.FileWriter( os.path.join(model_dir, 'train_summaries'), sess.graph) eval_writer = tf.summary.FileWriter( os.path.join(model_dir, 'eval_summaries'), sess.graph) best_eval_acc = 0.0 for epoch in range(begin_at_epoch, begin_at_epoch + params.num_epochs): # Run one epoch logging.info("Epoch {}/{}".format( epoch + 1, begin_at_epoch + params.num_epochs)) # Compute number of batches in one epoch (one full pass over the training set) num_steps = (params.train_size + params.batch_size - 1) // params.batch_size train_sess(sess, train_model_spec, num_steps, train_writer, params) # Save weights last_save_path = os.path.join(model_dir, 'last_weights', 'after-epoch') last_saver.save(sess, last_save_path, global_step=epoch + 1) # Evaluate for one epoch on validation set num_steps = (params.eval_size + params.batch_size - 1) // params.batch_size metrics = evaluate_sess(sess, eval_model_spec, num_steps, eval_writer) # If best_eval, best_save_path eval_acc = metrics['accuracy'] if eval_acc >= best_eval_acc: # Store new best accuracy best_eval_acc = eval_acc # Save weights best_save_path = os.path.join(model_dir, 'best_weights', 'after-epoch') best_save_path = best_saver.save(sess, best_save_path, global_step=epoch + 1) logging.info("- Found new best accuracy, saving in {}".format( best_save_path)) # Save best eval metrics in a json file in the model directory best_json_path = os.path.join( model_dir, "metrics_eval_best_weights.json") save_dict_to_json(metrics, best_json_path) # Save latest eval metrics in a json file in the model directory last_json_path = os.path.join(model_dir, "metrics_eval_last_weights.json") save_dict_to_json(metrics, last_json_path) # Reset the graph after training and evaluation - used in hyper parameters search tf.reset_default_graph()