Python replicate Exemples, tf_agents.utils.common.replicate Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tf_env_wrappers.py Projet : v-sivak/quantum-control-rl

    def wrap(self, input_action):
        """
        Args:
            input_action (dict): nested tensor action produced by the neural
                                 net. Dictionary keys are those marked True
                                 in 'to_learn'.

        Returns:
            actions (dict): nested tensor action which includes all action
                            components expected by the GKP class.

        """
        # step counter to follow the script of periodicity 'period'
        i = self._env._elapsed_steps % self.period
        out_shape = nest_utils.get_outer_shape(input_action, self._action_spec)

        action = {}
        for a in self.to_learn.keys():
            C1 = self.use_mask and self.mask[a][i]==0
            C2 = not self.to_learn[a]
            if C1 or C2: # if not learning: replicate scripted action
                action[a] = common.replicate(self.script[a][i], out_shape)
            else: # if learning: rescale input tensor
                action[a] = input_action[a]*self.scale[a]
                if self.learn_residuals:
                    action[a] += common.replicate(self.script[a][i], out_shape)
                
        return action

Exemple #2

0

Afficher le fichier

def test_trajectory_optimiser_pathological_trajectories(
        action_space, horizon, batch_size):
    """
    The replay buffer is a FIFO buffer of fixed capacity. Ensure that the capacity is sufficient
    such that the initial observation is still present in the buffer even in the pathological case
    where all trajectories are of length 2.
    """

    # construct the environment model
    observations = list(
        chain.from_iterable(
            repeat(
                [
                    replicate(tf.constant(StepType.FIRST), [batch_size]),
                    replicate(tf.constant(StepType.LAST), [batch_size]),
                ],
                horizon,
            )))

    transition_model = TrajectoryOptimiserTransitionModel(
        action_space, iter(observations))
    reward = ConstantReward(OBSERVATION_SPACE_SPEC, action_space, -1.0)
    termination_model = TrajectoryOptimiserTerminationModel(
        OBSERVATION_SPACE_SPEC)
    environment_model = EnvironmentModel(
        transition_model=transition_model,
        reward_model=reward,
        termination_model=termination_model,
        initial_state_distribution_model=DeterministicInitialStateModel(
            StepType.FIRST),
        batch_size=batch_size,
    )

    time_step_space = time_step_spec(OBSERVATION_SPACE_SPEC)
    policy = RandomTFPolicy(time_step_space, action_space)
    stub_policy_state_updater = StubPolicyStateUpdater()
    trajectory_optimiser = PolicyTrajectoryOptimiser(
        policy,
        horizon,
        population_size=batch_size,
        max_iterations=1,
        policy_state_updater=stub_policy_state_updater,
    )

    time_step = restart(tf.expand_dims(tf.constant(StepType.FIRST), axis=0),
                        batch_size=1)

    trajectory_optimiser.optimise(time_step, environment_model)

    stored_trajectory = stub_policy_state_updater.step_types[0]
    assert stored_trajectory[0][0] == StepType.FIRST

Exemple #3

0

Afficher le fichier

Fichier : categorical_policy.py Projet : whghdrms/agents-tutorial

    def _distribution(self, time_step, policy_state):
        """Implementation of `distribution`. Returns a `Categorical` distribution.

    The returned `Categorical` distribution has (unnormalized) probabilities
    `exp(inverse_temperature * weights)`.

    Args:
      time_step: A `TimeStep` tuple corresponding to `time_step_spec()`.
      policy_state: Unused in `CategoricalPolicy`. It is simply passed through.

    Returns:
      A `PolicyStep` named tuple containing:
        `action`: A (optionally nested) of tfp.distribution.Distribution
          capturing the distribution of next actions.
        `state`: A policy state tensor for the next call to distribution.
        `info`: Optional side information such as action log probabilities.
    """
        outer_shape = nest_utils.get_outer_shape(time_step,
                                                 self._time_step_spec)
        logits = (self._inverse_temperature *
                  common.replicate(self._weights, outer_shape))
        action_distribution = tfd.Independent(
            tfd.Categorical(logits=logits,
                            dtype=tf.nest.flatten(self.action_spec)[0].dtype))
        return policy_step.PolicyStep(action_distribution, policy_state)

Exemple #4

0

Afficher le fichier

Fichier : fixed_policy.py Projet : yangjue-han/agents

 def _action(self, time_step, policy_state, seed):
     del seed
     outer_shape = nest_utils.get_outer_shape(time_step,
                                              self._time_step_spec)
     action = tf.nest.map_structure(
         lambda t: common.replicate(t, outer_shape), self._action_value)
     return policy_step.PolicyStep(action, policy_state, self._policy_info)

Exemple #5

0

Afficher le fichier

def test_generate_virtual_rollouts(observation_space, action_space, batch_size,
                                   horizon):
    observation = create_uniform_distribution_from_spec(
        observation_space).sample()
    network = DummyEnsembleTransitionNetwork(observation_space)
    model = KerasTransitionModel([network], observation_space, action_space)
    env_model = EnvironmentModel(
        transition_model=model,
        reward_model=ConstantReward(observation_space, action_space, -1.0),
        termination_model=ConstantFalseTermination(observation_space),
        initial_state_distribution_model=DeterministicInitialStateModel(
            observation),
        batch_size=batch_size,
    )
    random_policy = RandomTFPolicy(time_step_spec(observation_space),
                                   action_space)

    replay_buffer, driver, wrapped_env_model = virtual_rollouts_buffer_and_driver(
        env_model, random_policy, horizon)

    driver.run(wrapped_env_model.reset())
    trajectory = replay_buffer.gather_all()

    mid_steps = repeat(1, horizon - 1)
    expected_step_types = tf.constant(list(chain([0], mid_steps, [2])))
    batched_step_types = replicate(expected_step_types, (batch_size, ))
    np.testing.assert_array_equal(batched_step_types, trajectory.step_type)

Exemple #6

0

Afficher le fichier

  def testReplicateScalarTensor(self):
    value = 1
    outer_shape = [2, 1]
    expected_replicated_value = np.array([[value], [value]])

    tf_value = tf.constant(value, shape=())
    replicated_value = self.evaluate(common.replicate(tf_value, outer_shape))
    self.assertAllEqual(expected_replicated_value, replicated_value)

Exemple #7

0

Afficher le fichier

  def _distribution(self, time_step, policy_state):
    outer_shape = nest_utils.get_outer_shape(time_step, self._time_step_spec)
    action = common.replicate(self._action_value, outer_shape)

    def dist_fn(action):
      """Return a categorical distribution with all density on fixed action."""
      return tfp.distributions.Deterministic(loc=action)
    return policy_step.PolicyStep(nest.map_structure(dist_fn, action),
                                  policy_state)

Exemple #8

0

Afficher le fichier

Fichier : fixed_policy.py Projet : WeilerWebServices/TensorFlow

  def _get_policy_info_and_action(self, time_step):
    outer_shape = nest_utils.get_outer_shape(time_step, self._time_step_spec)

    log_probability = tf.nest.map_structure(
        lambda _: tf.zeros(outer_shape, tf.float32), self._action_spec)
    policy_info = policy_step.set_log_probability(
        self._policy_info, log_probability=log_probability)
    action = tf.nest.map_structure(lambda t: common.replicate(t, outer_shape),
                                   self._action_value)
    return policy_info, action

Exemple #9

0

Afficher le fichier

Fichier : open_loop_policy.py Projet : adak32/bellman

    def _action(self, time_step, policy_state, seed):
        outer_shape = nest_utils.get_outer_shape(time_step,
                                                 self._time_step_spec)
        action = common.replicate(self._next_action, outer_shape)

        self._action_index += 1
        self._action_index %= self._actions.shape[0]
        self._next_action.assign(self._actions[self._action_index])

        return policy_step.PolicyStep(action, policy_state, info=())

Exemple #10

0

Afficher le fichier

    def _action(self, time_step, policy_state, seed):
        i = policy_state[0] % self.period # position within the policy period
        out_shape = nest_utils.get_outer_shape(time_step, self._time_step_spec)
        action = {}
        for a in self.script:
            A = common.replicate(self.script[a][i], out_shape)
            if a == 'alpha': # do Markovian feedback
                A *= time_step.observation['msmt'][:,-1,None]
                if policy_state[0] == 0: A *= 0
            action[a] = A

        return policy_step.PolicyStep(action, policy_state+1, self._policy_info)

Exemple #11

0

Afficher le fichier

  def testReplicateTensor(self, outer_shape_type):
    value = np.array([[1., 2., 3.], [4., 5., 6.]])
    if outer_shape_type == 'tf_constant':
      outer_shape = tf.constant([2, 1])
    else:
      outer_shape = [2, 1]
    expected_replicated_value = np.array([[value], [value]])

    tf_value = tf.constant(value)
    replicated_value = self.evaluate(common.replicate(tf_value, outer_shape))
    self.assertAllEqual(expected_replicated_value, replicated_value)

    if isinstance(outer_shape, np.ndarray):
      # The shape should be fully defined in this case.
      self.assertEqual(tf.TensorShape(outer_shape + list(value.shape)),
                       replicated_value.shape)

Exemple #12

0

Afficher le fichier

    def _action(self, time_step, policy_state, seed):
        i = policy_state[0] % self.period # position within the policy period
        out_shape = nest_utils.get_outer_shape(time_step, self._time_step_spec)
        action = {}
        for a in self.script.keys():
            A = common.replicate(self.script[a][i], out_shape)
            if a == 'alpha':
                m = time_step.observation['msmt']
                if i not in [2*self.K-1, 2*self.K]:
                    # feedback after trimming rounds is Markovian, and after
                    # intermediate sharpening rounds is simply zero.
                    A *= m[:,-1,:]
                    if policy_state[0] == 0: A *= 0
                else: # after K sharpening rounds do the Baysian feedback
                    A = self.Bayesian_feedback(i, m)
            action[a] = A

        return policy_step.PolicyStep(action, policy_state+1, self._policy_info)

Exemple #13

0

Afficher le fichier

  def testReplicateTensor(self, outer_shape_type):
    value = np.array([[1., 2., 3.], [4., 5., 6.]])
    if outer_shape_type == 'placeholder':
      outer_shape = tf.placeholder(tf.int32, shape=[2])
    elif outer_shape_type == 'tf_constant':
      outer_shape = tf.constant([2, 1])
    else:
      outer_shape = [2, 1]
    expected_replicated_value = np.array([[value], [value]])

    tf_value = tf.constant(value)
    tf_replicated_value = common.replicate(tf_value, outer_shape)
    if isinstance(outer_shape, np.ndarray):
      # The shape should be fully defined in this case.
      self.assertEqual(tf.TensorShape(outer_shape + list(value.shape)),
                       tf_replicated_value.shape)

    with self.test_session() as sess:
      feed_dict = {}
      if outer_shape_type == 'placeholder':
        feed_dict = {outer_shape: np.array([2, 1])}
      replicated_value = sess.run(tf_replicated_value, feed_dict)
      self.assertAllEqual(expected_replicated_value, replicated_value)

Exemple #14

0

Afficher le fichier

 def _action(self, time_step, policy_state, seed):
   del seed
   outer_shape = nest_utils.get_outer_shape(time_step, self._time_step_spec)
   action = common.replicate(self._action_value, outer_shape)
   return policy_step.PolicyStep(action, policy_state)

Exemple #15

0

Afficher le fichier

def test_trajectory_optimiser_each_iteration_starts_with_the_initial_observation(
        action_space, horizon, batch_size, max_iterations):
    class WrappedRandomTFPolicy(TFPolicy):
        def __init__(
            self,
            ts_spec: ts.TimeStep,
            action_spec: types.NestedTensorSpec,
            env_model: EnvironmentModel,
        ):
            super().__init__(ts_spec, action_spec)

            self._internal_policy = RandomTFPolicy(ts_spec, action_space)

            self._environment_model = env_model

        def _action(
            self,
            time_step: ts.TimeStep,
            policy_state: types.NestedTensor,
            seed: Optional[types.Seed],
        ) -> policy_step.PolicyStep:
            np.testing.assert_array_equal(
                time_step.observation,
                self._environment_model.current_time_step().observation)
            return self._internal_policy._action(time_step, policy_state, seed)

        def _distribution(
                self, time_step: ts.TimeStep,
                policy_state: types.NestedTensorSpec
        ) -> policy_step.PolicyStep:
            raise NotImplementedError()

    observations = list(
        repeat(replicate(tf.constant(StepType.MID), [batch_size]),
               max_iterations * (horizon + 1)))

    transition_model = TrajectoryOptimiserTransitionModel(
        action_space, iter(observations))
    reward = ConstantReward(OBSERVATION_SPACE_SPEC, action_space, -1.0)
    termination_model = TrajectoryOptimiserTerminationModel(
        OBSERVATION_SPACE_SPEC)
    environment_model = EnvironmentModel(
        transition_model=transition_model,
        reward_model=reward,
        termination_model=termination_model,
        initial_state_distribution_model=DeterministicInitialStateModel(
            StepType.FIRST),
        batch_size=batch_size,
    )

    time_step_space = time_step_spec(OBSERVATION_SPACE_SPEC)

    policy = WrappedRandomTFPolicy(time_step_space, action_space,
                                   environment_model)
    trajectory_optimiser = PolicyTrajectoryOptimiser(
        policy,
        horizon=horizon,
        population_size=batch_size,
        max_iterations=max_iterations,
    )

    initial_time_step = restart(tf.expand_dims(tf.constant(StepType.FIRST),
                                               axis=0),
                                batch_size=1)

    trajectory_optimiser.optimise(initial_time_step, environment_model)