def run(checkpoint, batch_size, dataset_name, images_path): # Create model images_placeholder, endpoints = create_model(batch_size, dataset_name) # Load pre-trained model session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) # Find images img_names = os.listdir(images_path) img_names.sort() print("\nNumber of images to process : ", len(img_names)) img_paths = [images_path + img_name for img_name in img_names] print("Number of images paths : ", len(img_paths)) global_results = [] with monitored_session.MonitoredSession( session_creator=session_creator) as sess: # Loop per batch of size 1 for i, img_path in enumerate(img_paths): print("\nNew Image :", img_path) images_data = load_images([img_path], batch_size, dataset_name) predictions = sess.run(endpoints.predicted_text, feed_dict={images_placeholder: images_data}) result = [ pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist() ] for line in result: print(result) global_results.append(line) print("Image :", i) return global_results
def __init__(self, estimator, serving_input_receiver_fn, output_key=None, graph=None, config=None): """Initialize a `CoreEstimatorPredictor`. Args: estimator: an instance of `learn.python.estimator.Estimator`. serving_input_receiver_fn: a function that takes no arguments and returns an instance of `ServingInputReceiver` compatible with `estimator`. output_key: Optional string specifying the export output to use. If `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): serving_input_receiver = serving_input_receiver_fn() signature_def = _get_signature_def( serving_input_receiver, estimator, output_key) checkpoint_dir = estimator.model_dir self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( config=config, checkpoint_dir=checkpoint_dir)) feed_tensor_info = signature_def.inputs self._feed_tensors = {k: self._graph.get_tensor_by_name(v.name) for k, v in feed_tensor_info.items()} fetch_tensor_info = signature_def.outputs self._fetch_tensors = {k: self._graph.get_tensor_by_name(v.name) for k, v in fetch_tensor_info.items()}
def _decode_infer_model(input_fn, feed_fn=None, model_fn=None, runCfg=None, model_dir=None, outputs=None, as_iterable=True, iterate_batches=False): # Check that model has been trained. checkpoint_path = saver.latest_checkpoint(model_dir) if not checkpoint_path: raise NotFittedError("Couldn't find trained model at %s." % model_dir) with tf.Graph().as_default() as g: random_seed.set_random_seed(runCfg.tf_random_seed) contrib_framework.create_global_step(g) features = input_fn() model_result, _, _ = model_fn(features, None, mode=tf.contrib.learn.ModeKeys.INFER) mon_sess = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, scaffold=None, config=runCfg._session_config)) return _decode_predict_generator(mon_sess, model_result, feed_fn, iterate_batches)
def get_monitored_session(checkpoint_path): session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, # scaffold=scaffold, # master=master, # config=config ) return monitored_session.MonitoredSession(session_creator=session_creator)
def run(checkpoint, batch_size, dataset_name, image_path_pattern): images_placeholder, endpoints = create_model(batch_size, dataset_name) images_data = load_images(image_path_pattern, batch_size, dataset_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) with monitored_session.MonitoredSession( session_creator=session_creator) as sess: predictions = sess.run(endpoints.predicted_text, feed_dict={images_placeholder: images_data}) return [pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist()]
def outliers_detection(checkpoint_dir): """Find outliers using Euclidean distance in the last dense layer. Parameters: checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): config = _CONFIG.copy() config['mode'] = 'validation' model = DeepSentiment(config) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) im_features_size = config['im_features_size'] rnn_size = config['rnn_size'] dense_mean = np.zeros((im_features_size + rnn_size)) with monitored_session.MonitoredSession( # Generate queue session_creator=session_creator, hooks=None) as session: batch_size = config['batch_size'] nb_batches = model.dataset.num_samples / batch_size for i in range(nb_batches): current_dense = session.run(model.concat_features) weight = float(i) * batch_size / ((i + 1) * batch_size) dense_mean = weight * dense_mean + ( 1 - weight) * current_dense.mean(axis=0) # Now look at outliers max_norms = np.zeros((batch_size)) max_post_ids = np.zeros((batch_size)) max_logits = np.zeros((batch_size, model.dataset.num_classes)) for i in range(nb_batches): current_dense, np_post_ids, current_logits = session.run( [model.concat_features, model.post_ids, model.logits]) current_diff = np.linalg.norm(current_dense - dense_mean, axis=1) for k in range(batch_size): if current_diff[k] > max_norms[k]: max_norms[k] = current_diff[k] max_post_ids[k] = np_post_ids[k] max_logits[k] = current_logits[k] np.save('data/max_norms.npy', max_norms) np.save('data/max_post_ids.npy', max_post_ids) np.save('data/max_logits.npy', max_logits) return max_norms, max_post_ids, max_logits
def run(checkpoint, batch_size, dataset_name, images_data, labels): images_placeholder, labels_placeholder, endpoints = create_model( batch_size, dataset_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) with monitored_session.MonitoredSession( session_creator=session_creator) as sess: prob = sess.run(endpoints, feed_dict={ images_placeholder: images_data, labels_placeholder: labels }) return prob
def session_creator(self, scaffold=None, config=None, checkpoint_dir=None, checkpoint_filename_with_path=None, max_wait_secs=7200): """Returns a session creator. The returned session creator will be configured with the correct master target and session configs. It will also run either init ops or ready ops by querying the `strategy` object when `create_session` is called on it. Args: scaffold: A `Scaffold` used for gathering or building supportive ops. If not specified a default one is created. It's used to finalize the graph. config: `ConfigProto` proto used to configure the session. checkpoint_dir: A string. Optional path to a directory where to restore variables. checkpoint_filename_with_path: Full file name path to the checkpoint file. Only one of `checkpoint_dir` or `checkpoint_filename_with_path` can be specified. max_wait_secs: Maximum time to wait for the session to become available. Returns: a descendant of SessionCreator. """ if config: session_config = copy.deepcopy(config) session_config.MergeFrom(self._session_config) else: session_config = self._session_config if not self._strategy or self._strategy.extended.experimental_should_init: logging.info("Creating chief session creator with config: %r", config) return monitored_session.ChiefSessionCreator( scaffold, master=self.master_target, config=session_config, checkpoint_dir=checkpoint_dir, checkpoint_filename_with_path=checkpoint_filename_with_path) else: logging.info("Creating worker session creator with config: %r", config) return monitored_session.WorkerSessionCreator( scaffold, master=self.master_target, config=session_config, max_wait_secs=max_wait_secs)
def run(checkpoint, batch_size, dataset_name, image_path_pattern): images_placeholder, endpoints = create_model(batch_size, dataset_name) images_data, paths = load_images(image_path_pattern, batch_size, dataset_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) import time stime = time.time() with monitored_session.MonitoredSession( session_creator=session_creator) as sess: predictions = sess.run(endpoints.predicted_text, feed_dict={images_placeholder: images_data}) print('Running time: ', time.time() - stime) return predictions.tolist(), paths
def _predict(self, run_ctx, step): var_name_to_value = run_ctx.session.run(self._var_name_to_train_var) logging.info('Building placeholders.') placeholder_to_value = { self._var_name_to_placeholder[v_name]: var_name_to_value[v_name] for v_name in var_name_to_value } def feed_variables(scaffold, session): del scaffold session.run(self._var_feed_op, feed_dict=placeholder_to_value) logging.info('Building scaffold.') scaffold = training.Scaffold(init_fn=feed_variables) with self._graph.as_default(): session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=None, master=run_ctx.session.sess_str) self._handler.setup(step) logging.info('Setup done.') with monitored_session.MonitoredSession( session_creator=session_creator, hooks=self._all_hooks) as predict_session: while not predict_session.should_stop(): logging.info('Predicting.... %s', self._predictions) preds_evaluated = predict_session.run(self._predictions) if not isinstance(self._predictions, dict): for pred in preds_evaluated: self._handler.handle_prediction(pred) else: for i in range( self._estimator._extract_batch_length( preds_evaluated)): self._handler.handle_prediction({ key: value[i] for key, value in six.iteritems( preds_evaluated) }) logging.info('Finalizing.') self._handler.finalize(step) logging.info('Done with prediction.') self._timer.update_last_triggered_step(step)
def __init__(self, estimator, prediction_input_fn, input_alternative_key=None, output_alternative_key=None, graph=None, config=None): """Initialize a `ContribEstimatorPredictor`. Args: estimator: an instance of `tf.contrib.learn.Estimator`. prediction_input_fn: a function that takes no arguments and returns an instance of `InputFnOps`. input_alternative_key: Optional. Specify the input alternative used for prediction. output_alternative_key: Specify the output alternative used for prediction. Not needed for single-headed models but required for multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): input_fn_ops = prediction_input_fn() # pylint: disable=protected-access model_fn_ops = estimator._get_predict_ops(input_fn_ops.features) # pylint: enable=protected-access checkpoint_path = checkpoint_management.latest_checkpoint( estimator.model_dir) self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( config=config, checkpoint_filename_with_path=checkpoint_path)) input_alternative_key = ( input_alternative_key or saved_model_export_utils.DEFAULT_INPUT_ALTERNATIVE_KEY) input_alternatives, _ = saved_model_export_utils.get_input_alternatives( input_fn_ops) self._feed_tensors = input_alternatives[input_alternative_key] (output_alternatives, output_alternative_key ) = saved_model_export_utils.get_output_alternatives( model_fn_ops, output_alternative_key) _, fetch_tensors = output_alternatives[output_alternative_key] self._fetch_tensors = fetch_tensors
def day_of_week_trend(checkpoint_dir): """Compute day of week trend. Parameters: checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): config = _CONFIG.copy() config['mode'] = 'validation' model = DeepSentiment(config) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) posts_logits = [] posts_labels = [] posts_days = [] posts_ids = [] with monitored_session.MonitoredSession( # Generate queue session_creator=session_creator, hooks=None) as session: batch_size = config['batch_size'] nb_batches = model.dataset.num_samples / batch_size for i in range(nb_batches): np_logits, np_labels, np_days, np_post_ids = session.run( [model.logits, model.labels, model.days, model.post_ids]) posts_logits.append(np_logits) posts_labels.append(np_labels) posts_days.append(np_days) posts_ids.append(np_post_ids) posts_logits, posts_labels = np.vstack(posts_logits), np.hstack( posts_labels) posts_days, posts_ids = np.hstack(posts_days), np.hstack(posts_ids) np.save('data/posts_logits_week.npy', posts_logits) np.save('data/posts_labels_week.npy', posts_labels) np.save('data/posts_days_week.npy', posts_days) np.save('data/posts_ids_week.npy', posts_ids) return posts_logits, posts_labels, posts_days, posts_ids
def main(): height, width, channel = get_dataset_image_shape(FLAGS.dataset_name) images_placeholder, endpoints = create_model(FLAGS.dataset_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path= '/media/dont/data/dont/datasets/crnn/tfname/logs/model.ckpt-110665') image = PIL.Image.open(sys.argv[1]).convert('RGB') #image = image.resize((width, height), PIL.Image.ANTIALIAS) images_data = np.expand_dims(np.asarray(image), axis=0) sess = monitored_session.MonitoredSession(session_creator=session_creator) start = timeit.default_timer() predictions = sess.run(endpoints.predicted_text, feed_dict={images_placeholder: images_data}) print(predictions[0].decode('utf8')) stop = timeit.default_timer() print('Time : ', stop - start)
def testMonitoredSession(self): random_seed.set_random_seed(1) with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") output = pa + pb with ms.MonitoredSession( session_creator=ms.ChiefSessionCreator()) as sess: fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) fd = {pa: [[0., 0.], [1., 1.]], pb: [[2., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[2., 1.], [5., 6.]])
def run(checkpoint, batch_size, dataset_name, image_path_pattern, annotations): images_placeholder, endpoints = create_model(batch_size, dataset_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) count = 0 width, height = get_dataset_image_size(dataset_name) with monitored_session.MonitoredSession( session_creator=session_creator) as sess: for path, boxes in annotations.items(): print("Processing: ", path) img = cv2.imread( os.path.join('/mnt/data/datasets/images', os.path.basename(path))) for box in boxes: img_cropped = img[box['ymin']:box['ymax'] + 1, box['xmin']:box['xmax'] + 1] pil_img = PIL.Image.fromarray(img_cropped) pil_img_cropped = pil_img.resize((width, height), PIL.Image.ANTIALIAS) count += 1 predictions = sess.run( endpoints.predicted_text, feed_dict={ images_placeholder: np.asarray(pil_img_cropped)[np.newaxis, ...] }) output = [ pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist() ][0] output = re.sub(r'([^\s\w]|_)+', '', output) cv2.rectangle(img, (box['xmin'], box['ymin']), (box['xmax'], box['ymax']), (0, 255, 0), 3) cv2.putText(img, output.replace('?', ''), (box['xmin'], box['ymin'] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) # file_writer.write([pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist()][0]) cv2.imwrite('/mnt/output/' + os.path.basename(path), img)
def test_moving_variables_properly_loaded_from_a_checkpoint(self): batch_size = 32 dataset_name = 'fsns' images_placeholder, endpoints = demo_inference.create_model(batch_size, dataset_name) image_path_pattern = 'testdata/fsns_train_%02d.png' images_data = demo_inference.load_images(image_path_pattern, batch_size, dataset_name) tensor_name = 'AttentionOcr_v1/conv_tower_fn/INCE/InceptionV3/Conv2d_2a_3x3/BatchNorm/moving_mean' moving_mean_tf = tf.get_default_graph().get_tensor_by_name( tensor_name + ':0') reader = tf.train.NewCheckpointReader(_CHECKPOINT) moving_mean_expected = reader.get_tensor(tensor_name) session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=_CHECKPOINT) with monitored_session.MonitoredSession( session_creator=session_creator) as sess: moving_mean_np = sess.run(moving_mean_tf, feed_dict={images_placeholder: images_data}) self.assertAllEqual(moving_mean_expected, moving_mean_np)
def correlation_matrix(nb_batches, checkpoint_dir): """Computes logits and labels of the input posts and save them as numpy files. Parameters: checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): config = _CONFIG.copy() config['mode'] = 'validation' model = DeepSentiment(config) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) posts_logits = [] posts_labels = [] with monitored_session.MonitoredSession( # Generate queue session_creator=session_creator, hooks=None) as session: for i in range(nb_batches): np_logits, np_labels = session.run( [model.logits, model.labels]) posts_logits.append(np_logits) posts_labels.append(np_labels) posts_logits, posts_labels = np.vstack(posts_logits), np.hstack( posts_labels) np.save('data/posts_logits.npy', posts_logits) np.save('data/posts_labels.npy', posts_labels) return posts_logits, posts_labels
def testMonitoredSessionStopAtStepHook(self): random_seed.set_random_seed(1) with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") output = pa + pb with variable_scope.variable_scope('gs', use_resource=True): training_util.create_global_step() hook = basic_session_run_hooks.StopAtStepHook(num_steps=2) with ms.MonitoredSession(session_creator=ms.ChiefSessionCreator(), hooks=[hook]) as sess: fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) fd = {pa: [[0., 0.], [1., 1.]], pb: [[2., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[2., 1.], [5., 6.]])
def train(args): """Train CIFAR-10 for a number of steps. Args: args: The command line arguments. """ with tf.Graph().as_default(): # Create the global step global_step = tf.contrib.framework.create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs(args.data_dir, args.batch_size, args.use_fp16) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, args.batch_size, args.use_fp16) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step, args.batch_size) scaffold = monitored_session.Scaffold() session_creator = monitored_session.ChiefSessionCreator( scaffold, checkpoint_dir=args.train_dir, config=tf.ConfigProto( log_device_placement=args.log_device_placement)) hooks = [ # Hook to save the model every N steps and at the end. basic_session_run_hooks.CheckpointSaverHook( args.train_dir, checkpoint_basename=CHECKPOINT_BASENAME, save_steps=args.checkpoint_interval_steps, scaffold=scaffold), # Hook to save a summary every N steps. basic_session_run_hooks.SummarySaverHook( save_steps=args.summary_interval_steps, output_dir=args.train_dir, scaffold=scaffold), # Hook to stop at step N. basic_session_run_hooks.StopAtStepHook( last_step=args.train_max_steps) ] # Start a new monitored session. This will automatically restart the # sessions if the parameter servers are preempted. with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as sess: while not sess.should_stop(): start_time = time.time() _, loss_value, global_step_value = sess.run( [train_op, loss, global_step]) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if global_step_value % 10 == 0: num_examples_per_step = args.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) logging.info( ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)'), datetime.now(), global_step_value, loss_value, examples_per_sec, sec_per_batch)
def _evaluate_once(checkpoint_path, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, hooks=None, config=None): """Evaluates the model at the given checkpoint path. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immediately after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_path: The path to a checkpoint to use for evaluation. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`, which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = _get_or_create_eval_step() # Prepare the run hooks. hooks = list(hooks or []) if eval_ops is not None: if any(isinstance(h, _MultiStepStopAfterNEvalsHook) for h in hooks): steps_per_run_variable = \ basic_session_run_hooks.get_or_create_steps_per_run_variable() update_eval_step = state_ops.assign_add( eval_step, math_ops.cast(steps_per_run_variable, dtype=eval_step.dtype), use_locking=True) else: update_eval_step = state_ops.assign_add(eval_step, 1, use_locking=True) if isinstance(eval_ops, dict): eval_ops['update_eval_step'] = update_eval_step elif isinstance(eval_ops, (tuple, list)): eval_ops = list(eval_ops) + [update_eval_step] else: eval_ops = [eval_ops, update_eval_step] eval_step_value = _get_latest_eval_step_value(eval_ops) for h in hooks: if isinstance(h, (_StopAfterNEvalsHook, _MultiStepStopAfterNEvalsHook)): h._set_evals_completed_tensor(eval_step_value) # pylint: disable=protected-access logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())) # Prepare the session creator. session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master=master, config=config) final_ops_hook = basic_session_run_hooks.FinalOpsHook( final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as session: if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_ops_hook.final_ops_values
model = common_flags.create_model( num_char_classes=dataset.num_char_classes, seq_length=dataset.max_sequence_length, num_views=dataset.num_of_views, null_code=dataset.null_code, charset=dataset.charset, ) raw_images = tf.placeholder(tf.uint8, shape=[batch_size, height, width, 3]) images = tf.map_fn(data_provider.preprocess_image, raw_images, dtype=tf.float32) endpoints = model.create_base(images, labels_one_hot=None) images_data = np.ndarray(shape=(batch_size, height, width, 3), dtype="uint8") session_creator = monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint) ocr_sess = monitored_session.MonitoredSession(session_creator=session_creator) """ MAIN RUN """ with detection_graph.as_default(): od_sess = tf.Session(graph=detection_graph) # for i in list(range(4))[::-1]: # print(i + 1) # time.sleep(1) image_tensor2 = detection_graph.get_tensor_by_name("image_tensor:0") # Each box represents a part of the image where a particular object was detected.
def PartialRestoreSession( master='', # pylint: disable=invalid-name is_chief=True, checkpoint_dir=None, restore_var_list=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=monitored_session.USE_DEFAULT, save_summaries_secs=monitored_session.USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100): """Creates a `MonitoredSession` for training. Supports partial restoration from checkpoints with parameter `restore_var_list`, by adding `CheckpointRestorerHook`. For a chief, this utility sets proper session initializer/restorer. It also creates hooks related to checkpoint and summary saving. For workers, this utility sets proper session creator which waits for the chief to initialize/restore. Please check `tf.train.MonitoredSession` for more information. Args: master: `String` the TensorFlow master to use. is_chief: If `True`, it will take care of initialization and recovery the underlying TensorFlow session. If `False`, it will wait on a chief to initialize or recover the TensorFlow session. checkpoint_dir: A string. Optional path to a directory where to restore variables. restore_var_list: a list of variables, optional, if not all variables should be recovered from checkpoint. Useful when changing network structures during training, i.e., finetuning a pretrained model with new layers. scaffold: A `Scaffold` used for gathering or building supportive ops. If not specified, a default one is created. It's used to finalize the graph. hooks: Optional list of `SessionRunHook` objects. chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved using a default checkpoint saver. If `save_checkpoint_secs` is set to `None`, then the default checkpoint saver isn't used. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then the default summary saver isn't used. Default 100. save_summaries_secs: The frequency, in secs, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then the default summary saver isn't used. Default not enabled. config: an instance of `tf.ConfigProto` proto used to configure the session. It's the `config` argument of constructor of `tf.Session`. stop_grace_period_secs: Number of seconds given to threads to stop after `close()` has been called. log_step_count_steps: The frequency, in number of global steps, that the global step/sec is logged. Returns: A `MonitoredSession` object. """ if save_summaries_steps == monitored_session.USE_DEFAULT \ and save_summaries_secs == monitored_session.USE_DEFAULT: save_summaries_steps = 100 save_summaries_secs = None elif save_summaries_secs == monitored_session.USE_DEFAULT: save_summaries_secs = None elif save_summaries_steps == monitored_session.USE_DEFAULT: save_summaries_steps = None scaffold = scaffold or monitored_session.Scaffold() if not is_chief: session_creator = monitored_session.WorkerSessionCreator( scaffold=scaffold, master=master, config=config) return monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks or [], stop_grace_period_secs=stop_grace_period_secs) all_hooks = [] if chief_only_hooks: all_hooks.extend(chief_only_hooks) if restore_var_list is None: restore_checkpoint_dir = checkpoint_dir else: restore_checkpoint_dir = None all_hooks.append( CheckpointRestorerHook(checkpoint_dir, var_list=restore_var_list)) all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) missing_vars = filter(lambda v: not (v in restore_var_list), all_vars) logging.warning("MonitoredTrainingSession not restoring %s", missing_vars) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=restore_checkpoint_dir, master=master, config=config) if checkpoint_dir: all_hooks.append( basic_session_run_hooks.StepCounterHook( output_dir=checkpoint_dir, every_n_steps=log_step_count_steps)) if (save_summaries_steps and save_summaries_steps > 0) or (save_summaries_secs and save_summaries_secs > 0): all_hooks.append( basic_session_run_hooks.SummarySaverHook( scaffold=scaffold, save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) if save_checkpoint_secs and save_checkpoint_secs > 0: all_hooks.append( basic_session_run_hooks.CheckpointSaverHook( checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) if hooks: all_hooks.extend(hooks) return monitored_session.MonitoredSession( session_creator=session_creator, hooks=all_hooks, stop_grace_period_secs=stop_grace_period_secs)
def oasis_evaluation(checkpoint_dir, num_classes): """Compute the logits of the OASIS dataset. Parameters: checkpoint_dir: Checkpoint of the saved model during training. num_classes: Number of classes. """ with tf.Graph().as_default(): config = _CONFIG.copy() mode = 'validation' dataset_dir = config['dataset_dir'] text_dir = config['text_dir'] emb_dir = config['emb_dir'] filename = config['filename'] initial_lr = config['initial_lr'] #batch_size = config['batch_size'] im_features_size = config['im_features_size'] rnn_size = config['rnn_size'] final_endpoint = config['final_endpoint'] tf.logging.set_verbosity(tf.logging.INFO) batch_size = 1 image_size = inception_v1.default_image_size images = tf.placeholder(tf.float32, [image_size, image_size, 3]) images_prep = inception_preprocessing.preprocess_image( images, image_size, image_size, is_training=False) images_prep_final = tf.expand_dims(images_prep, 0) texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE]) seq_lens = tf.placeholder(tf.int32, [batch_size]) # Create the model, use the default arg scope to configure the batch norm parameters. is_training = (mode == 'train') with slim.arg_scope(inception_v1.inception_v1_arg_scope()): images_features, _ = inception_v1.inception_v1( images_prep_final, final_endpoint=final_endpoint, num_classes=im_features_size, is_training=is_training) # Text model vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) vocab_size, embedding_dim = embedding.shape word_to_id = dict(zip(vocabulary, range(vocab_size))) # Unknown words = vector with zeros embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))]) word_to_id['<ukn>'] = vocab_size vocab_size = len(word_to_id) nb_emotions = num_classes with tf.variable_scope('Text'): # Word embedding W_embedding = tf.get_variable('W_embedding', [vocab_size, embedding_dim], trainable=False) input_embed = tf.nn.embedding_lookup(W_embedding, texts) # LSTM cell = tf.contrib.rnn.BasicLSTMCell(rnn_size) rnn_outputs, final_state = tf.nn.dynamic_rnn( cell, input_embed, sequence_length=seq_lens, dtype=tf.float32) # Need to convert seq_lens to int32 for stack texts_features = tf.gather_nd( rnn_outputs, tf.stack( [tf.range(batch_size), tf.cast(seq_lens, tf.int32) - 1], axis=1)) # Concatenate image and text features concat_features = tf.concat([images_features, texts_features], axis=1) # Dense layer W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size]) b_fc = tf.get_variable('b_fc', [fc_size]) dense_layer = tf.matmul(concat_features, W_fc) + b_fc dense_layer_relu = tf.nn.relu(dense_layer) W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions]) b_softmax = tf.get_variable('b_softmax', [nb_emotions]) logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) # Load oasis dataset df_oasis = pd.read_csv('data/oasis/OASIS.csv', encoding='utf-8') def load_image(name): im_path = 'data/oasis/images/' + name.strip() + '.jpg' one_im = imread(im_path) one_im = imresize(one_im, ((image_size, image_size, 3)))[:, :, :3] # to get rid of alpha channel return one_im df_oasis['image'] = df_oasis['Theme'].map(lambda x: load_image(x)) df_oasis['Theme'] = df_oasis['Theme'].map( lambda x: ''.join([i for i in x if not i.isdigit()]).strip()) vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) word_to_id = dict(zip(vocabulary, range(len(vocabulary)))) df_oasis['text_list'], df_oasis['text_len'] = zip( *df_oasis['Theme'].map(lambda x: _paragraph_to_ids( x, word_to_id, _POST_SIZE, emotions=''))) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=None) as session: nb_iter = df_oasis.shape[0] / batch_size scores = [] for i in range(nb_iter): np_images = df_oasis['image'][(i * batch_size):((i + 1) * batch_size)] np_texts = np.vstack( df_oasis['text_list'][(i * batch_size):((i + 1) * batch_size)]) np_seq_lens = df_oasis['text_len'][( i * batch_size):((i + 1) * batch_size)].values print(np_images.shape) session.run(images, feed_dict={images: np_images}) print(np_texts.shape) session.run(texts, feed_dict={texts: np_texts}) print(np_seq_lens.shape) session.run(seq_lens, feed_dict={seq_lens: np_seq_lens}) #scores.append(session.run(logits, feed_dict={images: np_images, texts: np_texts, seq_lens: np_seq_lens})) scores = np.vstack(scores) np.save('data/oasis_logits.npy', scores) return scores
def word_most_relevant(top_words, num_classes, checkpoint_dir): """Compute gradient of W_embedding to get the word most relevant to a label. Parameters: checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): config = _CONFIG.copy() mode = 'validation' dataset_dir = config['dataset_dir'] text_dir = config['text_dir'] emb_dir = config['emb_dir'] filename = config['filename'] initial_lr = config['initial_lr'] #batch_size = config['batch_size'] im_features_size = config['im_features_size'] rnn_size = config['rnn_size'] final_endpoint = config['final_endpoint'] tf.logging.set_verbosity(tf.logging.INFO) batch_size = 50 image_size = inception_v1.default_image_size images = tf.placeholder(tf.float32, [batch_size, image_size, image_size, 3]) texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE]) seq_lens = tf.placeholder(tf.int32, [batch_size]) # Create the model, use the default arg scope to configure the batch norm parameters. is_training = (mode == 'train') with slim.arg_scope(inception_v1.inception_v1_arg_scope()): images_features, _ = inception_v1.inception_v1( images, final_endpoint=final_endpoint, num_classes=im_features_size, is_training=is_training) # Text model vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) vocab_size, embedding_dim = embedding.shape word_to_id = dict(zip(vocabulary, range(vocab_size))) # Unknown words = vector with zeros embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))]) word_to_id['<ukn>'] = vocab_size vocab_size = len(word_to_id) nb_emotions = num_classes with tf.variable_scope('Text'): # Word embedding W_embedding = tf.get_variable('W_embedding', [vocab_size, embedding_dim], trainable=False) input_embed = tf.nn.embedding_lookup(W_embedding, texts) # LSTM cell = tf.contrib.rnn.BasicLSTMCell(rnn_size) rnn_outputs, final_state = tf.nn.dynamic_rnn( cell, input_embed, sequence_length=seq_lens, dtype=tf.float32) # Need to convert seq_lens to int32 for stack texts_features = tf.gather_nd( rnn_outputs, tf.stack( [tf.range(batch_size), tf.cast(seq_lens, tf.int32) - 1], axis=1)) # Concatenate image and text features concat_features = tf.concat([images_features, texts_features], axis=1) # Dense layer W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size]) b_fc = tf.get_variable('b_fc', [fc_size]) dense_layer = tf.matmul(concat_features, W_fc) + b_fc dense_layer_relu = tf.nn.relu(dense_layer) W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions]) b_softmax = tf.get_variable('b_softmax', [nb_emotions]) logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax # Initialise image #image_init = tf.random_normal([image_size, image_size, 3]) #image_init = inception_preprocessing.preprocess_image(image_init, image_size, image_size, is_training=False) #image_init = tf.expand_dims(image_init, 0) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=None) as session: nb_iter = len(top_words) / batch_size scores = [] for i in range(nb_iter): np_images = np.zeros((batch_size, image_size, image_size, 3)) np_texts = np.ones((batch_size, _POST_SIZE), dtype=np.int32) * (vocab_size - 1) np_texts[:, 0] = top_words[i * batch_size:(i + 1) * batch_size] np_seq_lens = np.ones(batch_size, dtype=np.int32) scores.append( session.run(logits, feed_dict={ images: np_images, texts: np_texts, seq_lens: np_seq_lens })) scores = np.vstack(scores) np.save('data/top_words_scores.npy', scores) np.save('data/top_words.npy', top_words) return scores, vocabulary, word_to_id
def evaluate_once( checkpoint_path, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, variables_to_restore=None, hooks=None, config=None): """Evaluates the model at the given checkpoint path. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immedietly after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_path: The path to a checkpoint to use for evaluation. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`, which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then tf.contrib.framework.get_variables_to_restore() is used. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = get_or_create_eval_step() if eval_ops is not None: update_eval_step = state_ops.assign_add(eval_step, 1) if isinstance(eval_ops, dict): eval_ops['update_eval_step'] = update_eval_step elif isinstance(eval_ops, (tuple, list)): eval_ops = list(eval_ops) + [update_eval_step] else: eval_ops = [eval_ops, update_eval_step] # Must come before the scaffold check. if scaffold and scaffold.saver: saver = scaffold.saver else: saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V2) scaffold = scaffold or monitored_session.Scaffold() scaffold = _scaffold_with_init(scaffold, saver, checkpoint_path) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) # Prepare the session creator. session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=None, master=master, config=config) # Prepare the run hooks. hooks = hooks or [] final_ops_hook = _FinalOpsHook(final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as session: if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_ops_hook.final_ops_values
def _monitored_train(graph, output_dir, train_op, loss_op, global_step_tensor=None, init_op=None, init_feed_dict=None, init_fn=None, log_every_steps=10, supervisor_is_chief=True, supervisor_master='', supervisor_save_model_secs=600, supervisor_save_model_steps=None, keep_checkpoint_max=5, supervisor_save_summaries_secs=None, supervisor_save_summaries_steps=100, feed_fn=None, steps=None, fail_on_nan_loss=True, hooks=None, max_steps=None): """Train a model via monitored_session. Given `graph`, a directory to write outputs to (`output_dir`), and some ops, run a training loop. The given `train_op` performs one step of training on the model. The `loss_op` represents the objective function of the training. It is expected to increment the `global_step_tensor`, a scalar integer tensor counting training steps. This function uses `Supervisor` to initialize the graph (from a checkpoint if one is available in `output_dir`), write summaries defined in the graph, and write regular checkpoints as defined by `supervisor_save_model_secs`. Training continues until `global_step_tensor` evaluates to `max_steps`, or, if `fail_on_nan_loss`, until `loss_op` evaluates to `NaN`. In that case the program is terminated with exit code 1. Args: graph: A graph to train. It is expected that this graph is not in use elsewhere. output_dir: A directory to write outputs to. train_op: An op that performs one training step when run. loss_op: A scalar loss tensor. global_step_tensor: A tensor representing the global step. If none is given, one is extracted from the graph using the same logic as in `Supervisor`. init_op: An op that initializes the graph. If `None`, use `Supervisor`'s default. init_feed_dict: A dictionary that maps `Tensor` objects to feed values. This feed dictionary will be used when `init_op` is evaluated. init_fn: Optional callable passed to Supervisor to initialize the model. log_every_steps: Output logs regularly. The logs contain timing data and the current loss. A `0` or negative value disables logging. supervisor_is_chief: Whether the current process is the chief supervisor in charge of restoring the model and running standard services. supervisor_master: The master string to use when preparing the session. supervisor_save_model_secs: Save checkpoints every this many seconds. Can not be specified with `supervisor_save_model_steps`. supervisor_save_model_steps: Save checkpoints every this many steps. Can not be specified with `supervisor_save_model_secs`. keep_checkpoint_max: The maximum number of recent checkpoint files to keep. As new files are created, older files are deleted. If None or 0, all checkpoint files are kept. This is simply passed as the max_to_keep arg to `tf.Saver` constructor. supervisor_save_summaries_secs: Save summaries every `supervisor_save_summaries_secs` seconds when training. supervisor_save_summaries_steps: Save summaries every `supervisor_save_summaries_steps` steps when training. Exactly one of `supervisor_save_model_steps` and `supervisor_save_model_secs` should be specified, and the other should be None. feed_fn: A function that is called every iteration to produce a `feed_dict` passed to `session.run` calls. Optional. steps: Trains for this many steps (e.g. current global step + `steps`). fail_on_nan_loss: If true, raise `NanLossDuringTrainingError` if `loss_op` evaluates to `NaN`. If false, continue training as if nothing happened. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. max_steps: Number of total steps for which to train model. If `None`, train forever. Two calls fit(steps=100) means 200 training iterations. On the other hand two calls of fit(max_steps=100) means, second call will not do any iteration since first call did all 100 steps. Returns: The final loss value. Raises: ValueError: If `output_dir`, `train_op`, `loss_op`, or `global_step_tensor` is not provided. See `tf.contrib.framework.get_global_step` for how we look up the latter if not provided explicitly. NanLossDuringTrainingError: If `fail_on_nan_loss` is `True`, and loss ever evaluates to `NaN`. ValueError: If both `steps` and `max_steps` are not `None`. """ if (steps is not None) and (max_steps is not None): raise ValueError('Can not provide both steps and max_steps.') if not output_dir: raise ValueError('Output directory should be non-empty %s.' % output_dir) if train_op is None: raise ValueError('Missing train_op.') if loss_op is None: raise ValueError('Missing loss_op.') if hooks is None: hooks = [] if not isinstance(hooks, list): raise ValueError('Hooks should be a list.') with graph.as_default(): global_step_tensor = contrib_variables.assert_or_get_global_step( graph, global_step_tensor) if global_step_tensor is None: raise ValueError('No "global_step" was provided or found in the graph.') if max_steps is not None: try: start_step = load_variable(output_dir, global_step_tensor.name) if max_steps <= start_step: logging.info('Skipping training since max_steps has already saved.') return None except: # pylint: disable=bare-except pass # Adapted SessionRunHooks such as ExportMonitor depend on the # CheckpointSaverHook to be executed before they should be executed. # The `hooks` param comprises of deprecated monitor hooks # (such as ExportMonitor). Appending them after the basic_session_run_hooks. all_hooks = [] with graph.as_default(): all_hooks.append(basic_session_run_hooks.NanTensorHook( loss_op, fail_on_nan_loss=fail_on_nan_loss)) if log_every_steps > 0: all_hooks.append(basic_session_run_hooks.LoggingTensorHook({ 'loss': loss_op.name, 'step': global_step_tensor.name }, every_n_iter=log_every_steps)) def make_saver(): return tf_saver.Saver( sharded=True, max_to_keep=keep_checkpoint_max, defer_build=True, write_version=saver_pb2.SaverDef.V1) scaffold = monitored_session.Scaffold( init_op=init_op, init_feed_dict=init_feed_dict, init_fn=init_fn, saver=monitored_session.Scaffold.get_or_default('saver', ops.GraphKeys.SAVERS, make_saver)) if not supervisor_is_chief: session_creator = monitored_session.WorkerSessionCreator( scaffold=scaffold, master=supervisor_master) else: session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=output_dir, master=supervisor_master) summary_writer = summary_io.SummaryWriterCache.get(output_dir) all_hooks.append( basic_session_run_hooks.StepCounterHook( summary_writer=summary_writer)) all_hooks.append( basic_session_run_hooks.SummarySaverHook( save_secs=supervisor_save_summaries_secs, save_steps=supervisor_save_summaries_steps, summary_writer=summary_writer, scaffold=scaffold)) if (supervisor_save_model_secs is not None or supervisor_save_model_steps is not None): all_hooks.append( basic_session_run_hooks.CheckpointSaverHook( output_dir, save_secs=supervisor_save_model_secs, save_steps=supervisor_save_model_steps, scaffold=scaffold)) if steps is not None or max_steps is not None: all_hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps)) all_hooks.extend(hooks) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=all_hooks) as super_sess: loss = None while not super_sess.should_stop(): _, loss = super_sess.run([train_op, loss_op], feed_fn() if feed_fn else None) summary_io.SummaryWriterCache.clear() return loss
def _infer_model(self, mode, input_fn=None, predict_keys=None, hooks=None, checkpoint_path=None): """Returns predictions for given features given an inference mode. Args: mode: The inference to use, possible values: PREDICT, GENERATE, ENCODE. input_fn: Input function returning features which is a dictionary of string feature name to `Tensor` or `SparseTensor`. If it returns a tuple, first item is extracted as features. Prediction continues until `input_fn` raises an end-of-input exception (`OutOfRangeError` or `StopIteration`). predict_keys: list of `str`, name of the keys to predict. It is used if the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used then rest of the predictions will be filtered from the dictionary. If `None`, returns all. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the prediction call. checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. Yields: Evaluated values of `predictions` tensors. Raises: ValueError: Could not find a trained model in model_dir. ValueError: if batch length of predictions are not same. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. """ hooks = self._check_hooks(hooks) # Check that model has been trained. if not checkpoint_path: checkpoint_path = saver.latest_checkpoint(self._model_dir) if not checkpoint_path: raise ValueError( 'Could not find trained model in model_dir: {}.'.format( self._model_dir)) with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(g) features = self._get_features_from_input_fn(input_fn, mode) estimator_spec = self._call_model_fn(features, None, mode) predictions = self._extract_keys(estimator_spec.predictions, predict_keys) with monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, scaffold=estimator_spec.scaffold, config=self._session_config), hooks=hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in xrange(extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems( preds_evaluated) }
def train(train_op, logdir, master='', is_chief=True, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, config=None): """Runs the training loop. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where the graph and checkpoints are saved. master: The URL of the master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. scaffold: An tf.train.Scaffold instance. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the training loop. chief_only_hooks: List of `tf.train.SessionRunHook` instances which are run inside the training loop for the chief trainer only. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved using a default checkpoint saver. If `save_checkpoint_secs` is set to `None`, then the default checkpoint saver isn't used. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If `save_summaries_steps` is set to `None`, then the default summary saver isn't used. config: An instance of `tf.ConfigProto`. Returns: the value of the loss function after training. Raises: ValueError: if `logdir` is `None` and either `save_checkpoint_secs` or `save_summaries_steps` are `None. """ # TODO(nsilberman): move this logic into monitored_session.py scaffold = scaffold or monitored_session.Scaffold() hooks = hooks or [] if is_chief: session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=logdir, master=master, config=config) if chief_only_hooks: hooks.extend(chief_only_hooks) hooks.append( basic_session_run_hooks.StepCounterHook(output_dir=logdir)) if save_summaries_steps: if logdir is None: raise ValueError( 'logdir cannot be None when save_summaries_steps is None') hooks.append( basic_session_run_hooks.SummarySaverHook( scaffold=scaffold, save_steps=save_summaries_steps, output_dir=logdir)) if save_checkpoint_secs: if logdir is None: raise ValueError( 'logdir cannot be None when save_checkpoint_secs is None') hooks.append( basic_session_run_hooks.CheckpointSaverHook( logdir, save_secs=save_checkpoint_secs, scaffold=scaffold)) else: session_creator = monitored_session.WorkerSessionCreator( scaffold=scaffold, master=master, config=config) with monitored_session.MonitoredSession(session_creator=session_creator, hooks=hooks) as session: loss = None while not session.should_stop(): loss = session.run(train_op) return loss
def evaluate_repeatedly(checkpoint_dir, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, eval_interval_secs=60, hooks=None, config=None, max_number_of_evaluations=None, timeout=None, timeout_fn=None): """Repeatedly searches for a checkpoint in `checkpoint_dir` and evaluates it. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immedietly after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_dir: The directory where checkpoints are stored. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`, which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. eval_interval_secs: The minimum number of seconds between evaluations. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. max_number_of_evaluations: The maximum times to run the evaluation. If left as `None`, then evaluation runs indefinitely. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. timeout_fn: Optional function to call after a timeout. If the function returns True, then it means that no new checkpoints will be generated and the iterator will exit. The function is called with no arguments. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = get_or_create_eval_step() # Prepare the run hooks. hooks = hooks or [] if eval_ops is not None: update_eval_step = state_ops.assign_add(eval_step, 1) for h in hooks: if isinstance(h, StopAfterNEvalsHook): h._set_evals_completed_tensor(update_eval_step) # pylint: disable=protected-access if isinstance(eval_ops, dict): eval_ops['update_eval_step'] = update_eval_step elif isinstance(eval_ops, (tuple, list)): eval_ops = list(eval_ops) + [update_eval_step] else: eval_ops = [eval_ops, update_eval_step] final_ops_hook = basic_session_run_hooks.FinalOpsHook( final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) num_evaluations = 0 for checkpoint_path in checkpoints_iterator( checkpoint_dir, min_interval_secs=eval_interval_secs, timeout=timeout, timeout_fn=timeout_fn): session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master=master, config=config) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as session: logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) num_evaluations += 1 if (max_number_of_evaluations is not None and num_evaluations >= max_number_of_evaluations): return final_ops_hook.final_ops_values return final_ops_hook.final_ops_values
def class_visualisation(label, learning_rate, checkpoint_dir): """Visualise class with gradient ascent. Parameters: label: Label to visualise. learning_rate: Learning rate of the gradient ascent. checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) image_size = inception_v1.default_image_size image = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) # Text model text_dir = 'text_model' emb_dir = 'embedding_weights' filename = 'glove.6B.50d.txt' vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) vocab_size, embedding_dim = embedding.shape word_to_id = dict(zip(vocabulary, range(vocab_size))) # Create text with only unknown words text = tf.constant( np.ones((1, _POST_SIZE), dtype=np.int32) * vocab_size) im_features_size = 128 # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception_v1.inception_v1_arg_scope()): images_features, _ = inception_v1.inception_v1( image, num_classes=im_features_size, is_training=True) # Unknown words = vector with zeros embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))]) word_to_id['<ukn>'] = vocab_size vocab_size = len(word_to_id) nb_emotions = 6 with tf.variable_scope('Text'): embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim]) # Word embedding W_embedding = tf.get_variable('W_embedding', [vocab_size, embedding_dim], trainable=False) embedding_init = W_embedding.assign(embedding_placeholder) input_embed = tf.nn.embedding_lookup(W_embedding, text) #input_embed_dropout = tf.nn.dropout(input_embed, self.keep_prob) # Rescale the mean by the actual number of non-zero values. nb_finite = tf.reduce_sum(tf.cast(tf.not_equal(input_embed, 0.0), tf.float32), axis=1) # If a post has zero finite elements, replace nb_finite by 1 nb_finite = tf.where(tf.equal(nb_finite, 0.0), tf.ones_like(nb_finite), nb_finite) h1 = tf.reduce_mean(input_embed, axis=1) * _POST_SIZE / nb_finite fc1_size = 2048 # Fully connected layer W_fc1 = tf.get_variable('W_fc1', [embedding_dim, fc1_size]) b_fc1 = tf.get_variable('b_fc1', [fc1_size]) texts_features = tf.matmul(h1, W_fc1) + b_fc1 texts_features = tf.nn.relu(texts_features) # Concatenate image and text features concat_features = tf.concat([images_features, texts_features], axis=1) W_softmax = tf.get_variable('W_softmax', [im_features_size + fc1_size, nb_emotions]) b_softmax = tf.get_variable('b_softmax', [nb_emotions]) logits = tf.matmul(concat_features, W_softmax) + b_softmax class_score = logits[:, label] l2_reg = 0.001 regularisation = l2_reg * tf.square(tf.norm(image)) obj_function = class_score - regularisation grad_obj_function = tf.gradients(obj_function, image)[0] grad_normalized = grad_obj_function / tf.norm(grad_obj_function) # Initialise image image_init = tf.random_normal([image_size, image_size, 3]) image_init = inception_preprocessing.preprocess_image( image_init, image_size, image_size, is_training=False) image_init = tf.expand_dims(image_init, 0) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) blur_every = 10 max_jitter = 16 show_every = 50 clip_percentile = 20 with monitored_session.MonitoredSession( session_creator=session_creator, hooks=None) as session: np_image = session.run(image_init) num_iterations = 500 for i in range(num_iterations): # Randomly jitter the image a bit ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) np_image = np.roll(np.roll(np_image, ox, 1), oy, 2) # Update image grad_update = session.run(grad_normalized, feed_dict={image: np_image}) np_image += learning_rate * grad_update # Undo the jitter np_image = np.roll(np.roll(np_image, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur #np_image = np.clip(np_image, -0.2, 0.8) # Set pixels with small norm to zero min_norm = np.percentile(np_image, clip_percentile) np_image[np_image < min_norm] = 0.0 if i % blur_every == 0: np_image = blur_image(np_image, sigma=0.5) if i % show_every == 0 or i == (num_iterations - 1): plt.imshow(deprocess_image(np_image[0])) plt.title('Iteration %d / %d' % (i + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show()