Esempio n. 1
0
def next_undecoded_checkpoint(model_dir, timeout_mins=240):
    """Yields successive checkpoints from model_dir."""
    last_ckpt = None
    last_step = 0
    while True:
        # Get the latest checkpoint.
        last_ckpt = contrib.training().wait_for_new_checkpoint(
            model_dir,
            last_ckpt,
            seconds_to_sleep=60,
            timeout=60 * timeout_mins)
        # Get all the checkpoint from the model dir.
        ckpt_path = tf.train.get_checkpoint_state(model_dir)
        all_model_checkpoint_paths = ckpt_path.all_model_checkpoint_paths
        ckpt_step = np.inf
        next_ckpt = None
        # Find the next checkpoint to eval based on last_step.
        for ckpt in all_model_checkpoint_paths:
            step = int(os.path.basename(ckpt).split("-")[1])
            if step > last_step and step < ckpt_step:
                ckpt_step = step
                next_ckpt = ckpt

        # If all the checkpoints have been evaluated.
        if last_ckpt is None and next_ckpt is None:
            tf.logging.info("Eval timeout: no new checkpoints within %dm" %
                            timeout_mins)
            break

        if next_ckpt is not None:
            last_step = ckpt_step
            last_ckpt = next_ckpt

        yield last_ckpt
    def test_model_shapes(self):
        """Test a few of the important output shapes for NeuralStackModel.
    """
        batch_size = 100
        seq_length = 80
        embedding_size = 64
        vocab_size = 128

        hparams = neural_stack.neural_stack()
        problem_hparams = contrib.training().HParams()

        problem_hparams.add_hparam(
            "modality", {
                "inputs": modalities.ModalityType.SYMBOL,
                "targets": modalities.ModalityType.SYMBOL,
            })
        problem_hparams.add_hparam("vocab_size", {
            "inputs": vocab_size,
            "targets": vocab_size,
        })
        model = neural_stack.NeuralStackModel(hparams,
                                              problem_hparams=problem_hparams)

        features = {
            "inputs": tf.ones([batch_size, seq_length, 1, 1], dtype=tf.int32),
            "targets": tf.ones([batch_size, seq_length, 1, 1], dtype=tf.int32)
        }

        transformed_features = model.bottom(features)

        self.assertEqual([batch_size, seq_length, 1, embedding_size],
                         transformed_features["inputs"].shape)

        logits = model.body(transformed_features)

        self.assertEqual([batch_size, seq_length, 1, embedding_size],
                         logits.shape)
Esempio n. 3
0
def next_checkpoint(model_dir, timeout_mins=240):
    """Yields successive checkpoints from model_dir.

  Args:
    model_dir: The directory in which checkpoints are saved.
    timeout_mins: The maximum amount of time in minutes to wait
                  between checkpoints. Set this to -1 to wait indefinitely.
  Yields:
    last_ckpt: a new checkpoint path, or None if the timeout was reached.
  """
    last_ckpt = None
    timeout_secs = None
    if timeout_mins != -1:
        timeout_secs = timeout_mins * 60
    while True:
        last_ckpt = contrib.training().wait_for_new_checkpoint(
            model_dir, last_ckpt, seconds_to_sleep=60, timeout=timeout_secs)

        if last_ckpt is None:
            tf.logging.info("Eval timeout: no new checkpoints within %dm" %
                            timeout_mins)
            break

        yield last_ckpt