Ejemplo n.º 1
0
    def evaluator(
        self,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ):
        """The evaluation process."""

        # Build environment, model, network.
        environment = self._environment_factory()
        network = self._network_factory(self._env_spec.actions)
        model = self._model_factory(self._env_spec)

        # Create variable client for communicating with the learner.
        tf2_utils.create_variables(network, [self._env_spec.observations])
        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'policy': network.trainable_variables},
            update_period=self._variable_update_period)

        # Create the agent.
        actor = acting.MCTSActor(
            environment_spec=self._env_spec,
            model=model,
            network=network,
            discount=self._discount,
            variable_client=variable_client,
            num_simulations=self._num_simulations,
        )

        # Create the run loop and return it.
        logger = loggers.make_default_logger('evaluator')
        return acme.EnvironmentLoop(environment,
                                    actor,
                                    counter=counter,
                                    logger=logger)
Ejemplo n.º 2
0
    def actor(
        self,
        replay: reverb.Client,
        variable_source: acme.VariableSource,
        counter: counting.Counter,
    ) -> acme.EnvironmentLoop:
        """The actor process."""

        # Build environment, model, network.
        environment = self._environment_factory()
        network = self._network_factory(self._env_spec.actions)
        model = self._model_factory(self._env_spec)

        # Create variable client for communicating with the learner.
        tf2_utils.create_variables(network, [self._env_spec.observations])
        variable_client = tf2_variable_utils.VariableClient(
            client=variable_source,
            variables={'network': network.trainable_variables},
            update_period=self._variable_update_period)

        # Component to add things into replay.
        adder = adders.NStepTransitionAdder(
            client=replay,
            n_step=self._n_step,
            discount=self._discount,
        )

        # Create the agent.
        actor = acting.MCTSActor(
            environment_spec=self._env_spec,
            model=model,
            network=network,
            discount=self._discount,
            adder=adder,
            variable_client=variable_client,
            num_simulations=self._num_simulations,
        )

        # Create the loop to connect environment and agent.
        return acme.EnvironmentLoop(environment, actor, counter)
Ejemplo n.º 3
0
  def __init__(
      self,
      network: snt.Module,
      model: models.Model,
      optimizer: snt.Optimizer,
      n_step: int,
      discount: float,
      replay_capacity: int,
      num_simulations: int,
      environment_spec: specs.EnvironmentSpec,
      batch_size: int,
  ):

    # Create a replay server for storing transitions.
    replay_table = reverb.Table(
        name=adders.DEFAULT_PRIORITY_TABLE,
        sampler=reverb.selectors.Uniform(),
        remover=reverb.selectors.Fifo(),
        max_size=replay_capacity,
        rate_limiter=reverb.rate_limiters.MinSize(1))
    self._server = reverb.Server([replay_table], port=None)

    # The adder is used to insert observations into replay.
    address = f'localhost:{self._server.port}'
    adder = adders.NStepTransitionAdder(
        client=reverb.Client(address),
        n_step=n_step,
        discount=discount)

    # The dataset provides an interface to sample from replay.
    replay_client = reverb.TFClient(address)
    action_spec: specs.DiscreteArray = environment_spec.actions
    dataset = datasets.make_reverb_dataset(
        client=replay_client,
        environment_spec=environment_spec,
        extra_spec={
            'pi': specs.Array(
                shape=(action_spec.num_values,), dtype=np.float32)
        },
        transition_adder=True)

    dataset = dataset.batch(batch_size, drop_remainder=True)

    tf2_utils.create_variables(network, [environment_spec.observations])

    # Now create the agent components: actor & learner.
    actor = acting.MCTSActor(
        environment_spec=environment_spec,
        model=model,
        network=network,
        discount=discount,
        adder=adder,
        num_simulations=num_simulations,
    )

    learner = learning.AZLearner(
        network=network,
        optimizer=optimizer,
        dataset=dataset,
        discount=discount,
    )

    # The parent class combines these together into one 'agent'.
    super().__init__(
        actor=actor,
        learner=learner,
        min_observations=10,
        observations_per_step=1,
    )