Python history_to_observationsの例、tensor2tensor.trax.rl.online_tune.history_to_observations Pythonの例

コード例 #1

0

ファイルを表示

 def _current_observation(self):
     observations = online_tune.history_to_observations(
         self._trainer.state.history,
         self._observation_metrics,
         self._observation_range,
         self._include_lr_in_observation,
     )
     assert observations.shape[0] > 0, "No values in history for any metric."
     return observations[-1, :]

コード例 #2

0

ファイルを表示

 def test_clips_observations(self):
     history = trax_history.History()
     self._append_metrics(history, ("eval", "loss"), [-10, 10])
     observations = online_tune.history_to_observations(
         history,
         metrics=(("eval", "loss"), ),
         observation_range=(-2, 2),
         include_lr=False,
     )
     np.testing.assert_array_equal(observations, [[-2], [2]])

コード例 #3

0

ファイルを表示

 def test_clips_observations(self):
     history = trax_history.History()
     self._append_metrics(history, ("eval", "loss"), [-10, 10])
     observations = online_tune.history_to_observations(
         history,
         metrics=(("eval", "loss"), ),
         observation_range=(-2, 2),
         control_configs=None,
     )
     np.testing.assert_array_equal(observations, [[-1], [1]])

コード例 #4

0

ファイルを表示

ファイル: online_tune_env.py プロジェクト: conspiracy0/tensor2tensor

 def _current_observation(self):
   observations = online_tune.history_to_observations(
       self._trainer.state.history,
       self._observation_metrics,
       self._metric_range,
       self._control_configs if self._include_controls_in_observation
       else None,
   )
   assert observations.shape[0] > 0, "No values in history for any metric."
   return observations[-1, :]

コード例 #5

0

ファイルを表示

 def test_converts_history_to_observations_without_learning_rate(self):
     history = trax_history.History()
     self._append_metrics(history, ("train", "loss"), [3.0, 1.07])
     self._append_metrics(history, ("eval", "accuracy"), [0.12, 0.68])
     observations = online_tune.history_to_observations(
         history,
         metrics=(("eval", "accuracy"), ("train", "loss")),
         observation_range=(0, 5),
         include_lr=False,
     )
     np.testing.assert_array_equal(observations,
                                   [[0.12, 3.0], [0.68, 1.07]])

コード例 #6

0

ファイルを表示

 def test_converts_history_to_observations_without_controls(self):
     history = trax_history.History()
     self._append_metrics(history, ("train", "loss"), [1.0, 0.07])
     self._append_metrics(history, ("eval", "accuracy"), [0.12, 0.68])
     observations = online_tune.history_to_observations(
         history,
         metrics=(("eval", "accuracy"), ("train", "loss")),
         observation_range=(-1, 1),
         control_configs=None,
     )
     np.testing.assert_array_almost_equal(observations,
                                          [[0.12, 1.0], [0.68, 0.07]])

コード例 #7

0

ファイルを表示

 def test_converts_history_to_observations_with_learning_rate(self):
     history = trax_history.History()
     self._append_metrics(history, ("train", "training/learning_rate"),
                          [1e-3, 1e-4])
     observations = online_tune.history_to_observations(
         history,
         metrics=(),
         observation_range=(0, 5),
         include_lr=True,
     )
     self.assertEqual(observations.shape, (2, 1))
     ((log_lr_1, ), (log_lr_2, )) = observations
     self.assertGreater(log_lr_1, log_lr_2)

コード例 #8

0

ファイルを表示

 def test_converts_history_to_observations_with_controls(self):
     history = trax_history.History()
     self._append_metrics(history, ("train", "training/learning_rate"),
                          [1e-3, 1e-4])
     observations = online_tune.history_to_observations(
         history,
         metrics=(),
         observation_range=(0, 5),
         control_configs=(("learning_rate", None, (1e-9, 10.0), False), ),
     )
     self.assertEqual(observations.shape, (2, 1))
     ((log_lr_1, ), (log_lr_2, )) = observations
     self.assertGreater(log_lr_1, log_lr_2)

コード例 #9

0

ファイルを表示

def PolicySchedule(
    history,
    observation_metrics=(
        ("train", "metrics/accuracy"),
        ("train", "metrics/loss"),
        ("eval", "metrics/accuracy"),
        ("eval", "metrics/loss"),
    ),
    include_lr_in_observation=False,
    observation_range=(0.0, 5.0),
    start_lr=0.001,
    max_lr=10.0,
    action_multipliers=(1.0 / 1.5, 1.0 / 1.25, 1.0, 1.25, 1.5),
    policy_and_value_model=trax_models.FrameStackMLP,
    policy_and_value_two_towers=False,
    policy_dir=gin.REQUIRED,
):
    """Learning rate schedule controlled by a learned policy.

  Args:
    history: the history of training and evaluation (History object).
    observation_metrics: list of pairs (mode, metric), as in the History object.
    include_lr_in_observation: bool, whether to include the learning rate in
      observations.
    observation_range: tuple (low, high), range to clip the observation to.
    start_lr: starting learning rate.
    max_lr: maximum value to clip the learning rate to.
    action_multipliers: sequence of LR multipliers that policy actions
      correspond to.
    policy_and_value_model: Trax model to use as the policy.
    policy_and_value_two_towers: bool, whether the action distribution and value
      prediction is computed by separate model towers.
    policy_dir: directory with the policy checkpoint.

  Returns:
    a function learning_rate(step): float -> float, the step-dependent lr.
  """

    # Turn the history into observations for the policy. If we don't have any,
    # return the initial learning rate.
    start_time = time.time()
    observations = online_tune.history_to_observations(
        history, observation_metrics, observation_range,
        include_lr_in_observation)
    logging.vlog(1, "Building observations took %0.2f sec.",
                 time.time() - start_time)
    if observations.shape[0] == 0:
        return lambda _: start_lr

    # Build the policy network and load its parameters.
    start_time = time.time()
    net = ppo.policy_and_value_net(
        n_actions=len(action_multipliers),
        bottom_layers_fn=policy_and_value_model,
        two_towers=policy_and_value_two_towers,
    )
    logging.vlog(1, "Building the policy network took %0.2f sec.",
                 time.time() - start_time)
    start_time = time.time()
    # (opt_state, state, epoch, opt_step)
    (opt_state, state, _, _) = ppo.maybe_restore_opt_state(policy_dir)
    assert opt_state is not None, "Policy checkpoint not found."
    (params, _) = opt_state
    logging.vlog(1, "Restoring the policy parameters took %0.2f sec.",
                 time.time() - start_time)

    # Run the policy and sample an action.
    seed = random.randint(0, 2**31 - 1)
    rng = jax_random.get_prng(seed=seed)
    start_time = time.time()
    # ((log_probs, value_preds), state). We have no way to pass state to the next
    # step, but that should be fine.
    ((log_probs, _), _) = net(np.array([observations]), params, state, rng=rng)
    logging.vlog(1, "Running the policy took %0.2f sec.",
                 time.time() - start_time)
    # Sample from the action distribution for the last timestep.
    action = utils.gumbel_sample(log_probs[0, -1, :])

    # Get a new learning rate.
    new_lr = online_tune.new_learning_rate(action, history, action_multipliers,
                                           max_lr)
    return lambda _: new_lr

コード例 #10

0

ファイルを表示

def PolicySchedule(
    history,
    observation_metrics=(
        ("train", "metrics/accuracy"),
        ("train", "metrics/loss"),
        ("eval", "metrics/accuracy"),
        ("eval", "metrics/loss"),
    ),
    include_controls_in_observation=False,
    control_configs=(
        # (name, start, (low, high), flip)
        ("learning_rate", 1e-3, (1e-9, 10.0), False), ),
    observation_range=(0.0, 10.0),
    action_multipliers=(1.0 / 1.5, 1.0 / 1.25, 1.0, 1.25, 1.5),
    policy_and_value_model=trax_models.FrameStackMLP,
    policy_and_value_two_towers=False,
    policy_and_value_vocab_size=None,
    policy_dir=gin.REQUIRED,
    temperature=1.0,
):
    """Learning rate schedule controlled by a learned policy.

  Args:
    history: the history of training and evaluation (History object).
    observation_metrics: list of pairs (mode, metric), as in the History object.
    include_controls_in_observation: bool, whether to include the controls in
      observations.
    control_configs: control configs, see trax.rl.envs.OnlineTuneEnv.
    observation_range: tuple (low, high), range to clip the metrics to.
    action_multipliers: sequence of LR multipliers that policy actions
      correspond to.
    policy_and_value_model: Trax model to use as the policy.
    policy_and_value_two_towers: bool, whether the action distribution and value
      prediction is computed by separate model towers.
    policy_and_value_vocab_size: vocabulary size of a policy and value network
      operating on serialized representation. If None, use raw continuous
      representation.
    policy_dir: directory with the policy checkpoint.
    temperature: temperature for sampling from the policy.

  Returns:
    a function nontrainable_params(step): float -> {"name": float}, the
    step-dependent schedule for nontrainable parameters.
  """

    # Turn the history into observations for the policy. If we don't have any,
    # return the initial learning rate.
    start_time = time.time()
    observations = online_tune.history_to_observations(
        history, observation_metrics, observation_range,
        control_configs if include_controls_in_observation else None)
    logging.vlog(1, "Building observations took %0.2f sec.",
                 time.time() - start_time)
    if observations.shape[0] == 0:
        controls = {
            name: start_value
            for (name, start_value, _, _) in control_configs
        }
        return lambda _: controls

    assert policy_and_value_vocab_size is None, (
        "Serialized policies are not supported yet.")
    # Build the policy network and load its parameters.
    start_time = time.time()
    net = ppo.policy_and_value_net(
        n_controls=len(control_configs),
        n_actions=len(action_multipliers),
        vocab_size=policy_and_value_vocab_size,
        bottom_layers_fn=policy_and_value_model,
        two_towers=policy_and_value_two_towers,
    )
    logging.vlog(1, "Building the policy network took %0.2f sec.",
                 time.time() - start_time)
    start_time = time.time()
    # (opt_state, state, epoch, opt_step)
    (opt_state, state, _, _) = ppo.maybe_restore_opt_state(policy_dir)
    assert opt_state is not None, "Policy checkpoint not found."
    (params, _) = opt_state
    logging.vlog(1, "Restoring the policy parameters took %0.2f sec.",
                 time.time() - start_time)

    # Run the policy and sample an action.
    seed = random.randint(0, 2**31 - 1)
    rng = jax_random.get_prng(seed=seed)
    start_time = time.time()
    # ((log_probs, value_preds), state). We have no way to pass state to the next
    # step, but that should be fine.
    (log_probs, _) = (net(np.array([observations]),
                          params=params,
                          state=state,
                          rng=rng))
    logging.vlog(1, "Running the policy took %0.2f sec.",
                 time.time() - start_time)
    # Sample from the action distribution for the last timestep.
    assert log_probs.shape == (1, len(control_configs) * observations.shape[0],
                               len(action_multipliers))
    action = utils.gumbel_sample(log_probs[0, -len(control_configs):, :] /
                                 temperature)

    # Get new controls.
    controls = {
        # name: value
        control_config[0]: online_tune.update_control(  # pylint: disable=g-complex-comprehension
            control_config, control_action, history, action_multipliers)
        for (control_action, control_config) in zip(action, control_configs)
    }
    return lambda _: controls