def create_session(checkpoint_path, n_cpu_threads=-1): """Creates a MonitoredSession. Args: checkpoint_path (string): Path either to checkpoint directory or directly to a checkpoint file. n_cpu_threads (int): Number of CPU threads. If negative, we assume either GPU decoding or that all CPU cores can be used. Returns: A TensorFlow MonitoredSession. """ try: if os.path.isdir(checkpoint_path): checkpoint_path = saver.latest_checkpoint(checkpoint_path) else: logging.info("%s is not a directory. Interpreting as direct " "path to checkpoint..." % checkpoint_path) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=session_config(n_cpu_threads))) except tf.errors.NotFoundError as e: logging.fatal( "Could not find all variables of the computation " "graph in the T2T checkpoint file. This means that the " "checkpoint does not correspond to the model specified in " "SGNMT. Please double-check pred_src_vocab_size, " "pred_trg_vocab_size, and all the t2t_* parameters. " "Also make sure that the checkpoint exists and is readable") raise AttributeError("Could not initialize TF session.")
def create_session(self, checkpoint_dir): """Creates a MonitoredSession for this predictor.""" checkpoint_path = saver.latest_checkpoint(checkpoint_dir) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config()))
def _create_session(self): """Creates a MonitoredSession for restoring model""" checkpoint_path = saver.latest_checkpoint(self.config.output_path) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config()))
def predict(self, input_fn, predict_keys=None, hooks=None): """Returns predictions for given features. Args: input_fn: Input function returning features which is a dictionary of string feature name to `Tensor` or `SparseTensor`. If it returns a tuple, first item is extracted as features. Prediction continues until `input_fn` raises an end-of-input exception (`OutOfRangeError` or `StopIteration`). predict_keys: list of `str`, name of the keys to predict. It is used if the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used then rest of the predictions will be filtered from the dictionary. If `None`, returns all. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the prediction call. Yields: Evaluated values of `predictions` tensors. Raises: ValueError: Could not find a trained model in model_dir. ValueError: if batch length of predictions are not same. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. """ hooks = list(hooks or []) # Check that model has been trained. checkpoint_path = saver.latest_checkpoint(self._model_dir) if not checkpoint_path: raise ValueError('Could not find trained model in model_dir: {}.'.format( self._model_dir)) with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) training.create_global_step(g) features = self._get_features_from_input_fn(input_fn) estimator_spec = self._call_model_fn(features, None, model_fn_lib.ModeKeys.FIT) predictions = self._extract_keys(estimator_spec.predictions, predict_keys) with training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, scaffold=estimator_spec.scaffold, config=config_pb2.ConfigProto(allow_soft_placement=True)), hooks=hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in range(self._extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems(preds_evaluated) }
def _create_session(self): """Creates a MonitoredSession for this predictor.""" try: checkpoint_path = saver.latest_checkpoint(self._checkpoint_dir) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config())) except tf.errors.NotFoundError as e: logging.fatal( "Could not find all variables of the computation " "graph in the MoE checkpoint file. This means that the " "checkpoint does not correspond to the model specification.") raise AttributeError("Could not initialize TF session for MoE.")
def create_session(self): """Creates a MonitoredSession for this predictor.""" try: checkpoint_path = saver.latest_checkpoint(self._checkpoint_dir) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config())) except tf.errors.NotFoundError as e: logging.fatal( "Could not find all variables of the computation " "graph in the T2T checkpoint file. This means that the " "checkpoint does not correspond to the model specified in " "SGNMT. Please double-check pred_src_vocab_size, " "pred_trg_vocab_size, and all the t2t_* parameters.") raise AttributeError("Could not initialize TF session.")
def create_session(): """Creates a MonitoredSession for this predictor.""" if not FLAGS.checkpoint_path: raise AttributeError("Please set --checkpoint_path") try: if os.path.isdir(FLAGS.checkpoint_path): checkpoint_path = saver.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info("%s is not a directory. Interpreting as direct " "path to checkpoint..." % checkpoint_path) return training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=session_config())) except tf.errors.NotFoundError as e: tf.logging.fatal("Could not find all variables of the computation " "graph in the T2T checkpoint file. This means that the " "checkpoint does not correspond to the specified model") raise AttributeError("Could not initialize TF session.")
def predict(self, input_fn, predict_keys=None, hooks=None, checkpoint_path=None, yield_single_examples=True): """Yields predictions for given features. Args: input_fn: A function that constructs the features. Prediction continues until `input_fn` raises an end-of-input exception (`OutOfRangeError` or `StopIteration`). See @{$get_started/premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must have same constraints as below. * features: A `Tensor` or a dictionary of string feature name to `Tensor`. features are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. * A tuple, in which case the first item is extracted as features. predict_keys: list of `str`, name of the keys to predict. It is used if the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used then rest of the predictions will be filtered from the dictionary. If `None`, returns all. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the prediction call. checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the model_fn instead of decomposing the batch into individual elements. This is useful if model_fn return some tensor with first dimension not equal to the batch size Yields: Evaluated values of `predictions` tensors. Raises: ValueError: Could not find a trained model in model_dir. ValueError: if batch length of predictions are not same and yield_single_examples is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. """ hooks = _check_hooks_type(hooks) # Check that model has been trained. if not checkpoint_path: checkpoint_path = saver.latest_checkpoint(self._model_dir) if not checkpoint_path: raise ValueError('Could not find trained model in model_dir: {}.'.format( self._model_dir)) with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(g) features, input_hooks = self._get_features_from_input_fn( input_fn, model_fn_lib.ModeKeys.PREDICT) estimator_spec = self._call_model_fn( features, None, model_fn_lib.ModeKeys.PREDICT, self.config) predictions = self._extract_keys(estimator_spec.predictions, predict_keys) all_hooks = list(input_hooks) all_hooks.extend(hooks) all_hooks.extend(list(estimator_spec.prediction_hooks or [])) with training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, master=self._config.master, scaffold=estimator_spec.scaffold, config=self._session_config), hooks=all_hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not yield_single_examples: yield preds_evaluated elif not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in range(self._extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems(preds_evaluated) }
def PlaceGraph(metagraph, cluster=None, allotted_time=3600, hparams=None, verbose=False): """Place the provided metagraph. Args: metagraph: the metagraph to place. cluster: an optional set of hardware resource to optimize the placement for. If none is specified, we'll optimize the placement for the hardware available on the local machine. allotted_time: the maximum amount to time in seconds to spend optimizing the placement. hparams: hyperparameters used to fine tune the placer. verbose: prints debug information if True. Returns: The placed metagraph. """ if cluster is None: cluster = gcluster.Cluster() # Optimize the metagraph to speedup the placement rewriter_config = rewriter_config_pb2.RewriterConfig() rewriter_config.optimizers.append("pruning") rewriter_config.optimizers.append("constfold") rewriter_config.optimizers.append("arithmetic") rewriter_config.optimizers.append("dependency") rewriter_config.optimizers.append("pruning") optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config, metagraph, verbose=verbose, cluster=cluster) optimized_metagraph = meta_graph_pb2.MetaGraphDef() optimized_metagraph.CopyFrom(metagraph) optimized_metagraph.graph_def.CopyFrom(optimized_graph) item = gitem.Item(optimized_metagraph) # Measure the runtime achievable with the original placement. try: _, original_run_time, _ = cluster.MeasureCosts(item) if verbose: print("Runtime for original placement: " + str(original_run_time)) except errors.OpError as e: if verbose: print("Original placement isn't feasible: " + str(e)) original_run_time = hparams.failing_signal if hparams is None: hparams = hierarchical_controller.hierarchical_controller_hparams() # We run with a single child hparams.num_children = 1 with tf_ops.Graph().as_default(): # Place all the nodes of the controller on the CPU. We don't want them to # fight for accelerator memory with the model to optimize. with tf_ops.device("/device:CPU:0"): model = hierarchical_controller.HierarchicalController( hparams, item, cluster) ops = model.build_controller() session_creator = training.ChiefSessionCreator() with training.MonitoredSession( session_creator=session_creator) as sess: start_time = time.time() current_time = start_time while current_time - start_time < allotted_time: grouping_actions = model.generate_grouping(sess) input_to_seq2seq = model.create_group_embeddings( grouping_actions, verbose=verbose) model.generate_placement(input_to_seq2seq, sess) try: run_time = model.eval_placement(sess, verbose=verbose) except errors.OpError as e: if verbose: print("Failed to run graph:" + str(e)) run_time = hparams.failing_signal updated = model.update_reward(sess, run_time, verbose=verbose) if updated and run_time < original_run_time: if verbose: print("Found better placement, with runtime " + str(run_time)) model.export_placement(metagraph) model.process_reward(sess) current_time = time.time() return metagraph
def custom_predict(self, perturb, ranker, input_fn, predict_keys=None, hooks=None, checkpoint_path=None, yield_single_examples=True): if not checkpoint_path: checkpoint_path = checkpoint_management.latest_checkpoint( ranker._model_dir) if not checkpoint_path: logging.info( 'Could not find trained model in model_dir: {}, running ' 'initialization to predict.'.format(ranker._model_dir)) with tf.Graph().as_default() as g: self.perturb_on = tf.compat.v1.placeholder(tf.bool) random_seed.set_random_seed(ranker._config.tf_random_seed) ranker._create_and_assert_global_step(g) features, input_hooks = ranker._get_features_from_input_fn( input_fn, ModeKeys.PREDICT) estimator_spec = ranker._call_model_fn(features, None, ModeKeys.PREDICT, ranker.config) # Call to warm_start has to be after model_fn is called. ranker._maybe_warm_start(checkpoint_path) predictions = estimator_spec.predictions all_hooks = list(input_hooks) all_hooks.extend(list([])) self.grad_variable_pair_tensor = calculate_grad_var_pair(self) with training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, master=ranker._config.master, scaffold=estimator_spec.scaffold, config=ranker._session_config), hooks=all_hooks) as mon_sess: while not mon_sess.should_stop(): [ preds_evaluated, temp_query_features_evaluated, temp_answer_features_evaluated, temp_embedded_features_evaluated, temp_labels_evaluated, temp_normalized_features_evaluated, self.grad_variable_pair_evaluated, ] = mon_sess.run([ predictions, self.query_features, self.answer_features, self.embedded_features_tensor, self.labels_tensor, self.normalized_features, self.grad_variable_pair_tensor, ], {self.perturb_on: perturb}) # Save values for tensors during first nonperturbed evaluation to be # used in next execution. if self.first_eval: self.query_features_evaluated = temp_query_features_evaluated self.answer_features_evaluated = temp_answer_features_evaluated self.embedded_features_evaluated = temp_embedded_features_evaluated self.labels_evaluated = temp_labels_evaluated self.normalized_features_evaluated = temp_normalized_features_evaluated self.first_eval = False if not yield_single_examples: yield preds_evaluated elif not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in range( self._extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems( preds_evaluated) }
def predict_with_guide(self, input_fn, predict_keys=None, hooks=None, checkpoint_path=None, latest_filename=None, yield_single_examples=True): hooks = estimator_lib._check_hooks_type(hooks) checkpoint_path = self._checkpoint_path(checkpoint_path, latest_filename) with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(g) features, labels, input_hooks = self._get_features_and_labels_from_input_fn( input_fn, model_fn_lib.ModeKeys.EVAL) features_ph = { key: array_ops.placeholder(value.dtype, value.shape, name=key) for key, value in features.items() } labels_ph = array_ops.placeholder(labels.dtype, labels.shape, name="labels") feed_guide_hook = FeedGuideHook(features_ph, labels_ph, features, labels, self.model_dir) estimator_spec = self._call_model_fn(features_ph, labels_ph, model_fn_lib.ModeKeys.PREDICT, self.config) if isinstance(predict_keys, list): predict_keys += list( self.params["model_instances"][0].metrics_dict.keys()) elif predict_keys is None: # Evaluating volume don't need metrics in model, we use XXXPred to generate 3D predict predict_keys = [ x for x in estimator_spec.predictions if x not in self.params["model_instances"][0].metrics_dict ] predict_keys.extend( list(self.params["model_instances"][0].metrics_eval)) else: raise TypeError( "predict_keys must be None(for 3d eval) or a list(for 2d eval, " "for example [\"Names\", \"Indices\"])") predictions = self._extract_keys(estimator_spec.predictions, predict_keys) feed_guide_hook.predictions = predictions all_hooks = list(input_hooks) + [feed_guide_hook] all_hooks.extend(hooks) all_hooks.extend(list(estimator_spec.prediction_hooks or [])) with training.MonitoredSession( session_creator=training.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, master=self._config.master, scaffold=estimator_spec.scaffold, config=self._session_config), hooks=all_hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not yield_single_examples: yield preds_evaluated elif not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in range( self._extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems( preds_evaluated) }