Python VariableClient Exemples, acme.tf.variable_utils.VariableClient Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

    def evaluator(
        self,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The evaluation process."""

        # Build environment, model, network.
        environment = self._environment_factory()
        network = self._network_factory(self._env_spec.actions)
        model = self._model_factory(self._env_spec)

        # Create variable client for communicating with the learner.
        tf2_utils.create_variables(network, [self._env_spec.observations])
        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'policy': network.trainable_variables},
            update_period=self._variable_update_period)

        # Create the agent.
        actor = acting.MCTSActor(
            environment_spec=self._env_spec,
            model=model,
            network=network,
            discount=self._discount,
            variable_client=variable_client,
            num_simulations=self._num_simulations,
        )

        # Create the run loop and return it.
        logger = loggers.make_default_logger('evaluator')
        return acme.EnvironmentLoop(environment,
                                    actor,
                                    counter=counter,
                                    logger=logger)

Exemple #2

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

  def evaluator(
      self,
      variable_source: acme.VariableSource,
      counter: counting.Counter,
  ):
    """The evaluation process."""
    environment = self._environment_factory(True)
    network = self._network_factory(self._environment_spec.actions)

    tf2_utils.create_variables(network, [self._obs_spec])
    policy_network = snt.DeepRNN([
        network,
        lambda qs: tf.cast(tf.argmax(qs, axis=-1), tf.int32),
    ])

    variable_client = tf2_variable_utils.VariableClient(
        client=variable_source,
        variables={'policy': policy_network.variables},
        update_period=self._variable_update_period)

    # Make sure not to use a random policy after checkpoint restoration by
    # assigning variables before running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    actor = actors.RecurrentActor(
        policy_network=policy_network, variable_client=variable_client)

    # Create the run loop and return it.
    logger = loggers.make_default_logger(
        'evaluator', save_data=True, steps_key='evaluator_steps')
    counter = counting.Counter(counter, 'evaluator')

    return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #3

0

Afficher le fichier

  def test_update(self):
    # Create two instances of the same model.
    actor_model = snt.nets.MLP([50, 30])
    learner_model = snt.nets.MLP([50, 30])

    # Create variables first.
    input_spec = tf.TensorSpec(shape=(28,), dtype=tf.float32)
    tf2_utils.create_variables(actor_model, [input_spec])
    tf2_utils.create_variables(learner_model, [input_spec])

    # Register them as client and source variables, respectively.
    actor_variables = actor_model.variables
    np_learner_variables = [
        tf2_utils.to_numpy(v) for v in learner_model.variables
    ]
    variable_source = fakes.VariableSource(np_learner_variables)
    variable_client = tf2_variable_utils.VariableClient(
        variable_source, {'policy': actor_variables})

    # Now, given some random batch of test input:
    x = tf.random.normal(shape=(8, 28))

    # Before copying variables, the models have different outputs.
    actor_output = actor_model(x).numpy()
    learner_output = learner_model(x).numpy()
    self.assertFalse(np.allclose(actor_output, learner_output))

    # Update the variable client.
    variable_client.update_and_wait()

    # After copying variables (by updating the client), the models are the same.
    actor_output = actor_model(x).numpy()
    learner_output = learner_model(x).numpy()
    self.assertTrue(np.allclose(actor_output, learner_output))

Exemple #4

0

Afficher le fichier

  def evaluator(self, variable_source: acme.VariableSource,
                counter: counting.Counter):
    """The evaluation process."""
    environment = self._environment_factory(True)
    network = self._network_factory(self._environment_spec.actions)
    tf2_utils.create_variables(network, [self._environment_spec.observations])

    variable_client = tf2_variable_utils.VariableClient(
        client=variable_source,
        variables={'policy': network.variables},
        update_period=self._variable_update_period)

    # Make sure not to use a random policy after checkpoint restoration by
    # assigning variables before running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    actor = acting.IMPALAActor(
        network=network, variable_client=variable_client)

    # Create the run loop and return it.
    logger = loggers.make_default_logger(
        'evaluator', steps_key='evaluator_steps')
    counter = counting.Counter(counter, 'evaluator')
    return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #5

0

Afficher le fichier

Fichier : agent.py Projet : vishalbelsare/acme

    def make_actor(
        self,
        policy_network: snt.Module,
        adder: Optional[adders.Adder] = None,
        variable_source: Optional[core.VariableSource] = None,
        deterministic_policy: Optional[bool] = False,
    ):
        """Create an actor instance."""
        if variable_source:
            # Create the variable client responsible for keeping the actor up-to-date.
            variable_client = variable_utils.VariableClient(
                client=variable_source,
                variables={'policy': policy_network.variables},
                update_period=1000,
            )

            # Make sure not to use a random policy after checkpoint restoration by
            # assigning variables before running the environment loop.
            variable_client.update_and_wait()

        else:
            variable_client = None

        # Create the actor which defines how we take actions.
        return acting.SVG0Actor(policy_network=policy_network,
                                adder=adder,
                                variable_client=variable_client,
                                deterministic_policy=deterministic_policy)

Exemple #6

0

Afficher le fichier

    def actor(
        self,
        replay: reverb.Client,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ) -> acme.EnvironmentLoop:
        """The actor process."""

        action_spec = self._environment_spec.actions
        observation_spec = self._environment_spec.observations

        # Create environment and target networks to act with.
        environment = self._environment_factory(False)
        agent_networks = self._network_factory(action_spec,
                                               self._num_critic_heads)

        # Make sure observation network is defined.
        observation_network = agent_networks.get('observation', tf.identity)

        # Create a stochastic behavior policy.
        behavior_network = snt.Sequential([
            observation_network,
            agent_networks['policy'],
            networks.StochasticSamplingHead(),
        ])

        # Ensure network variables are created.
        tf2_utils.create_variables(behavior_network, [observation_spec])
        policy_variables = {'policy': behavior_network.variables}

        # Create the variable client responsible for keeping the actor up-to-date.
        variable_client = tf2_variable_utils.VariableClient(variable_source,
                                                            policy_variables,
                                                            update_period=1000)

        # Make sure not to use a random policy after checkpoint restoration by
        # assigning variables before running the environment loop.
        variable_client.update_and_wait()

        # Component to add things into replay.
        adder = adders.NStepTransitionAdder(
            client=replay,
            n_step=self._n_step,
            max_in_flight_items=self._max_in_flight_items,
            discount=self._additional_discount)

        # Create the agent.
        actor = actors.FeedForwardActor(policy_network=behavior_network,
                                        adder=adder,
                                        variable_client=variable_client)

        # Create logger and counter; actors will not spam bigtable.
        counter = counting.Counter(counter, 'actor')
        logger = loggers.make_default_logger('actor',
                                             save_data=False,
                                             time_delta=self._log_every,
                                             steps_key='actor_steps')

        # Create the run loop and return it.
        return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #7

0

Afficher le fichier

  def evaluator(
      self,
      variable_source: acme.VariableSource,
      counter: counting.Counter,
  ):
    """The evaluation process."""

    action_spec = self._environment_spec.actions
    observation_spec = self._environment_spec.observations

    # Create environment and target networks to act with.
    environment = self._environment_factory(True)
    agent_networks = self._network_factory(action_spec)

    # Make sure observation network is defined.
    observation_network = agent_networks.get('observation', tf.identity)

    # Create a stochastic behavior policy.
    evaluator_network = snt.Sequential([
        observation_network,
        agent_networks['policy'],
        networks.StochasticMeanHead(),
    ])

    # Ensure network variables are created.
    tf2_utils.create_variables(evaluator_network, [observation_spec])
    policy_variables = {'policy': evaluator_network.variables}

    # Create the variable client responsible for keeping the actor up-to-date.
    variable_client = tf2_variable_utils.VariableClient(
        variable_source,
        policy_variables,
        update_period=self._variable_update_period)

    # Make sure not to evaluate a random actor by assigning variables before
    # running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    evaluator = actors.FeedForwardActor(
        policy_network=evaluator_network, variable_client=variable_client)

    # Create logger and counter.
    counter = counting.Counter(counter, 'evaluator')
    logger = loggers.make_default_logger(
        'evaluator', time_delta=self._log_every, steps_key='evaluator_steps')
    observers = self._make_observers() if self._make_observers else ()

    # Create the run loop and return it.
    return acme.EnvironmentLoop(
        environment,
        evaluator,
        counter,
        logger,
        observers=observers)

Exemple #8

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

    def actor(
        self,
        replay: reverb.Client,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The actor process."""

        action_spec = self._environment_spec.actions
        observation_spec = self._environment_spec.observations

        # Create environment and behavior networks
        environment = self._environment_factory(False)
        agent_networks = self._network_factory(action_spec)

        # Create behavior network by adding some random dithering.
        behavior_network = snt.Sequential([
            agent_networks.get('observation', tf.identity),
            agent_networks.get('policy'),
            networks.ClippedGaussian(self._sigma),
        ])

        # Ensure network variables are created.
        tf2_utils.create_variables(behavior_network, [observation_spec])
        variables = {'policy': behavior_network.variables}

        # Create the variable client responsible for keeping the actor up-to-date.
        variable_client = tf2_variable_utils.VariableClient(
            variable_source,
            variables,
            update_period=self._variable_update_period)

        # Make sure not to use a random policy after checkpoint restoration by
        # assigning variables before running the environment loop.
        variable_client.update_and_wait()

        # Component to add things into replay.
        adder = adders.NStepTransitionAdder(client=replay,
                                            n_step=self._n_step,
                                            discount=self._discount)

        # Create the agent.
        actor = actors.FeedForwardActor(behavior_network,
                                        adder=adder,
                                        variable_client=variable_client)

        # Create logger and counter; actors will not spam bigtable.
        counter = counting.Counter(counter, 'actor')
        logger = loggers.make_default_logger('actor',
                                             save_data=False,
                                             time_delta=self._log_every,
                                             steps_key='actor_steps')

        # Create the loop to connect environment and agent.
        return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #9

0

Afficher le fichier

    def make_executor(
        self,
        policy_networks: Dict[str, snt.Module],
        adder: Optional[adders.ParallelAdder] = None,
        variable_source: Optional[core.VariableSource] = None,
    ) -> core.Executor:
        """Create an executor instance.

        Args:
            policy_networks (Dict[str, snt.Module]): policy networks for each agent in
                the system.
            adder (Optional[adders.ParallelAdder], optional): adder to send data to
                a replay buffer. Defaults to None.
            variable_source (Optional[core.VariableSource], optional): variables server.
                Defaults to None.

        Returns:
            core.Executor: system executor, a collection of agents making up the part
                of the system generating data by interacting the environment.
        """

        shared_weights = self._config.shared_weights

        variable_client = None
        if variable_source:
            agent_keys = self._agent_types if shared_weights else self._agents

            # Create policy variables
            variables = {}
            for agent in agent_keys:
                variables[agent] = policy_networks[agent].variables

            # Get new policy variables
            variable_client = variable_utils.VariableClient(
                client=variable_source,
                variables={"policy": variables},
                update_period=self._config.executor_variable_update_period,
            )

            # Make sure not to use a random policy after checkpoint restoration by
            # assigning variables before running the environment loop.
            variable_client.update_and_wait()

        # Create the actor which defines how we take actions.
        return self._executor_fn(
            policy_networks=policy_networks,
            agent_specs=self._config.environment_spec.get_agent_specs(),
            shared_weights=shared_weights,
            variable_client=variable_client,
            adder=adder,
        )

Exemple #10

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

  def actor(
      self,
      replay: reverb.Client,
      variable_source: acme.VariableSource,
      counter: counting.Counter,
      epsilon: float,
  ) -> acme.EnvironmentLoop:
    """The actor process."""
    environment = self._environment_factory(False)
    network = self._network_factory(self._environment_spec.actions)

    tf2_utils.create_variables(network, [self._obs_spec])

    policy_network = snt.DeepRNN([
        network,
        lambda qs: tf.cast(trfl.epsilon_greedy(qs, epsilon).sample(), tf.int32),
    ])

    # Component to add things into replay.
    sequence_length = self._burn_in_length + self._trace_length + 1
    adder = adders.SequenceAdder(
        client=replay,
        period=self._replay_period,
        sequence_length=sequence_length,
        delta_encoded=True,
    )

    variable_client = tf2_variable_utils.VariableClient(
        client=variable_source,
        variables={'policy': policy_network.variables},
        update_period=self._variable_update_period)

    # Make sure not to use a random policy after checkpoint restoration by
    # assigning variables before running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    actor = actors.RecurrentActor(
        policy_network=policy_network,
        variable_client=variable_client,
        adder=adder)

    counter = counting.Counter(counter, 'actor')
    logger = loggers.make_default_logger(
        'actor', save_data=False, steps_key='actor_steps')

    # Create the loop to connect environment and agent.
    return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #11

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

    def evaluator(
        self,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The evaluation process."""

        action_spec = self._environment_spec.actions
        observation_spec = self._environment_spec.observations

        # Create environment and evaluator networks
        environment = self._environment_factory(True)
        agent_networks = self._network_factory(action_spec)

        # Create evaluator network.
        evaluator_network = snt.Sequential([
            agent_networks.get('observation', tf.identity),
            agent_networks.get('policy'),
        ])

        # Ensure network variables are created.
        tf2_utils.create_variables(evaluator_network, [observation_spec])
        variables = {'policy': evaluator_network.variables}

        # Create the variable client responsible for keeping the actor up-to-date.
        variable_client = tf2_variable_utils.VariableClient(
            variable_source,
            variables,
            update_period=self._variable_update_period)

        # Make sure not to evaluate a random actor by assigning variables before
        # running the environment loop.
        variable_client.update_and_wait()

        # Create the evaluator; note it will not add experience to replay.
        evaluator = actors.FeedForwardActor(evaluator_network,
                                            variable_client=variable_client)

        # Create logger and counter.
        counter = counting.Counter(counter, 'evaluator')
        logger = loggers.make_default_logger('evaluator',
                                             time_delta=self._log_every,
                                             steps_key='evaluator_steps')

        # Create the run loop and return it.
        return acme.EnvironmentLoop(environment, evaluator, counter, logger)

Exemple #12

0

Afficher le fichier

    def actor(
        self,
        replay: reverb.Client,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
        epsilon: float,
    ) -> acme.EnvironmentLoop:
        """The actor process."""
        environment = self._environment_factory(False)
        network = self._network_factory(self._env_spec.actions)

        # Just inline the policy network here.
        policy_network = snt.Sequential([
            network,
            lambda q: trfl.epsilon_greedy(q, epsilon=epsilon).sample(),
        ])

        tf2_utils.create_variables(policy_network,
                                   [self._env_spec.observations])
        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'policy': policy_network.trainable_variables},
            update_period=self._variable_update_period)

        # Make sure not to use a random policy after checkpoint restoration by
        # assigning variables before running the environment loop.
        variable_client.update_and_wait()

        # Component to add things into replay.
        adder = adders.NStepTransitionAdder(
            client=replay,
            n_step=self._n_step,
            discount=self._discount,
        )

        # Create the agent.
        actor = actors.FeedForwardActor(policy_network, adder, variable_client)

        # Create the loop to connect environment and agent.
        counter = counting.Counter(counter, 'actor')
        logger = loggers.make_default_logger('actor',
                                             save_data=False,
                                             steps_key='actor_steps')
        return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #13

0

Afficher le fichier

Fichier : variable_utils_test.py Projet : deepmind/acme

  def test_update_and_wait(self):
    # Create a variable source (emulating the learner).
    np_learner_variables = tf2_utils.to_numpy(self._learner_model.variables)
    variable_source = fakes.VariableSource(np_learner_variables)

    # Create a variable client (emulating the actor).
    variable_client = tf2_variable_utils.VariableClient(
        variable_source, {'policy': self._actor_model.variables})

    # Create some random batch of test input:
    x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE))

    # Before copying variables, the models have different outputs.
    self.assertNotAllClose(self._actor_model(x), self._learner_model(x))

    # Update the variable client.
    variable_client.update_and_wait()

    # After copying variables (by updating the client), the models are the same.
    self.assertAllClose(self._actor_model(x), self._learner_model(x))

Exemple #14

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

    def actor(
        self,
        replay: reverb.Client,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ) -> acme.EnvironmentLoop:
        """The actor process."""

        # Build environment, model, network.
        environment = self._environment_factory()
        network = self._network_factory(self._env_spec.actions)
        model = self._model_factory(self._env_spec)

        # Create variable client for communicating with the learner.
        tf2_utils.create_variables(network, [self._env_spec.observations])
        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'network': network.trainable_variables},
            update_period=self._variable_update_period)

        # Component to add things into replay.
        adder = adders.NStepTransitionAdder(
            client=replay,
            n_step=self._n_step,
            discount=self._discount,
        )

        # Create the agent.
        actor = acting.MCTSActor(
            environment_spec=self._env_spec,
            model=model,
            network=network,
            discount=self._discount,
            adder=adder,
            variable_client=variable_client,
            num_simulations=self._num_simulations,
        )

        # Create the loop to connect environment and agent.
        return acme.EnvironmentLoop(environment, actor, counter)

Exemple #15

0

Afficher le fichier

Fichier : agent_distributed.py Projet : deepmind/acme

    def evaluator(
        self,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The evaluation process."""

        # Create environment and target networks to act with.
        environment = self._environment_factory(True)
        agent_networks = self._network_factory(self._environment_spec)

        # Create a stochastic behavior policy.
        evaluator_network = snt.Sequential([
            agent_networks['observation'],
            agent_networks['policy'],
            networks.StochasticMeanHead(),
        ])

        # Create the variable client responsible for keeping the actor up-to-date.
        variable_client = tf2_variable_utils.VariableClient(
            variable_source,
            variables={'policy': evaluator_network.variables},
            update_period=1000)

        # Make sure not to evaluate a random actor by assigning variables before
        # running the environment loop.
        variable_client.update_and_wait()

        # Create the agent.
        evaluator = actors.FeedForwardActor(policy_network=evaluator_network,
                                            variable_client=variable_client)

        # Create logger and counter.
        counter = counting.Counter(counter, 'evaluator')
        logger = loggers.make_default_logger('evaluator',
                                             time_delta=self._log_every)

        # Create the run loop and return it.
        return acme.EnvironmentLoop(environment, evaluator, counter, logger)

Exemple #16

0

Afficher le fichier

    def evaluator(
        self,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The evaluation process."""
        environment = self._environment_factory(True)
        network = self._network_factory(self._env_spec.actions)

        # Just inline the policy network here.
        policy_network = snt.Sequential([
            network,
            lambda q: trfl.epsilon_greedy(q, self._evaluator_epsilon).sample(),
        ])

        tf2_utils.create_variables(policy_network,
                                   [self._env_spec.observations])

        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'policy': policy_network.trainable_variables},
            update_period=self._variable_update_period)

        # Make sure not to use a random policy after checkpoint restoration by
        # assigning variables before running the environment loop.
        variable_client.update_and_wait()

        # Create the agent.
        actor = actors.FeedForwardActor(policy_network,
                                        variable_client=variable_client)

        # Create the run loop and return it.
        logger = loggers.make_default_logger('evaluator',
                                             steps_key='evaluator_steps')
        counter = counting.Counter(counter, 'evaluator')
        return acme.EnvironmentLoop(environment,
                                    actor,
                                    counter=counter,
                                    logger=logger)

Exemple #17

0

Afficher le fichier

  def actor(
      self,
      replay: reverb.Client,
      variable_source: acme.VariableSource,
      counter: counting.Counter,
  ) -> acme.EnvironmentLoop:
    """The actor process."""
    environment = self._environment_factory(False)
    network = self._network_factory(self._environment_spec.actions)
    tf2_utils.create_variables(network, [self._environment_spec.observations])

    # Component to add things into the queue.
    adder = adders.SequenceAdder(
        client=replay,
        period=self._sequence_period,
        sequence_length=self._sequence_length)

    variable_client = tf2_variable_utils.VariableClient(
        client=variable_source,
        variables={'policy': network.variables},
        update_period=self._variable_update_period)

    # Make sure not to use a random policy after checkpoint restoration by
    # assigning variables before running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    actor = acting.IMPALAActor(
        network=network,
        variable_client=variable_client,
        adder=adder)

    counter = counting.Counter(counter, 'actor')
    logger = loggers.make_default_logger(
        'actor', save_data=False, steps_key='actor_steps')

    # Create the loop to connect environment and agent.
    return acme.EnvironmentLoop(environment, actor, counter, logger)

Exemple #18

0

Afficher le fichier

Fichier : builder.py Projet : NetColby/DNRL

    def make_executor(
        self,
        q_networks: Dict[str, snt.Module],
        action_selectors: Dict[str, Any],
        communication_module: BaseCommunicationModule,
        adder: Optional[adders.ParallelAdder] = None,
        variable_source: Optional[core.VariableSource] = None,
        trainer: Optional[training.MADQNRecurrentCommTrainer] = None,
        evaluator: bool = False,
    ) -> core.Executor:
        """Create an executor instance.

        Args:
            q_networks (Dict[str, snt.Module]): q-value networks for each agent in the
                system.
            action_selectors (Dict[str, Any]): policy action selector method, e.g.
                epsilon greedy.
            communication_module (BaseCommunicationModule): module for enabling
                communication protocols between agents.
            adder (Optional[adders.ParallelAdder], optional): adder to send data to
                a replay buffer. Defaults to None.
            variable_source (Optional[core.VariableSource], optional): variables server.
                Defaults to None.
            trainer (Optional[training.MADQNRecurrentCommTrainer], optional):
                system trainer. Defaults to None.
            evaluator (bool, optional): boolean indicator if the executor is used for
                for evaluation only. Defaults to False.

        Returns:
            core.Executor: system executor, a collection of agents making up the part
                of the system generating data by interacting the environment.
        """

        shared_weights = self._config.shared_weights

        variable_client = None
        if variable_source:
            agent_keys = self._agent_types if shared_weights else self._agents

            # Create policy variables
            variables = {agent: q_networks[agent].variables for agent in agent_keys}
            # Get new policy variables
            variable_client = variable_utils.VariableClient(
                client=variable_source,
                variables={"q_network": variables},
                update_period=self._config.executor_variable_update_period,
            )

            # Make sure not to use a random policy after checkpoint restoration by
            # assigning variables before running the environment loop.
            variable_client.update_and_wait()

        # Check if we should use fingerprints
        fingerprint = True if self._replay_stabiliser_fn is not None else False

        # Create the executor which coordinates the actors.
        return self._executor_fn(
            q_networks=q_networks,
            action_selectors=action_selectors,
            shared_weights=shared_weights,
            variable_client=variable_client,
            adder=adder,
            trainer=trainer,
            communication_module=communication_module,
            evaluator=evaluator,
            fingerprint=fingerprint,
        )

Exemple #19

0

Afficher le fichier

    def test_update(self):
        # Create a barrier to be shared between the test body and the variable
        # source. The barrier will block until, in this case, two threads call
        # wait(). Note that the (fake) variable source will call it within its
        # get_variables() call.
        barrier = threading.Barrier(2)

        # Create a variable source (emulating the learner).
        np_learner_variables = tf2_utils.to_numpy(
            self._learner_model.variables)
        variable_source = fakes.VariableSource(np_learner_variables, barrier)

        # Create a variable client (emulating the actor).
        variable_client = tf2_variable_utils.VariableClient(
            variable_source, {'policy': self._actor_model.variables},
            update_period=_UPDATE_PERIOD)

        # Create some random batch of test input:
        x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE))

        # Create variables by doing the computation once.
        learner_output = self._learner_model(x)
        actor_output = self._actor_model(x)
        del learner_output, actor_output

        for _ in range(_UPDATE_PERIOD):
            # Before the update period is reached, the models have different outputs.
            self.assertNotAllClose(self._actor_model.variables,
                                   self._learner_model.variables)

            # Before the update period is reached, the variable client should not make
            # any requests for variables.
            self.assertIsNone(variable_client._future)

            variable_client.update()

        # Make sure the last call created a request for variables and reset the
        # internal call counter.
        self.assertIsNotNone(variable_client._future)
        self.assertEqual(variable_client._call_counter, 0)
        future = variable_client._future

        for _ in range(_UPDATE_PERIOD):
            # Before the barrier allows the variables to be released, the models have
            # different outputs.
            self.assertNotAllClose(self._actor_model.variables,
                                   self._learner_model.variables)

            variable_client.update()

            # Make sure no new requests are made.
            self.assertEqual(variable_client._future, future)

        # Calling wait() on the barrier will now allow the variables to be copied
        # over from source to client.
        barrier.wait()

        # Update once more to ensure the variables are copied over.
        while variable_client._future is not None:
            variable_client.update()

        # After a number of update calls, the variables should be the same.
        self.assertAllClose(self._actor_model.variables,
                            self._learner_model.variables)