Exemple #1
0
    def learner(self, replay: reverb.Client, counter: counting.Counter):
        """The learning part of the agent."""
        # Create the networks.
        network = self._network_factory(self._env_spec.actions)

        tf2_utils.create_variables(network, [self._env_spec.observations])

        # The dataset object to learn from.
        dataset = datasets.make_reverb_dataset(
            server_address=replay.server_address,
            batch_size=self._batch_size,
            prefetch_size=self._prefetch_size)

        # Create the optimizer.
        optimizer = snt.optimizers.Adam(self._learning_rate)

        # Return the learning agent.
        return learning.AZLearner(
            network=network,
            discount=self._discount,
            dataset=dataset,
            optimizer=optimizer,
            counter=counter,
        )
Exemple #2
0
  def __init__(
      self,
      network: snt.Module,
      model: models.Model,
      optimizer: snt.Optimizer,
      n_step: int,
      discount: float,
      replay_capacity: int,
      num_simulations: int,
      environment_spec: specs.EnvironmentSpec,
      batch_size: int,
  ):

    # Create a replay server for storing transitions.
    replay_table = reverb.Table(
        name=adders.DEFAULT_PRIORITY_TABLE,
        sampler=reverb.selectors.Uniform(),
        remover=reverb.selectors.Fifo(),
        max_size=replay_capacity,
        rate_limiter=reverb.rate_limiters.MinSize(1))
    self._server = reverb.Server([replay_table], port=None)

    # The adder is used to insert observations into replay.
    address = f'localhost:{self._server.port}'
    adder = adders.NStepTransitionAdder(
        client=reverb.Client(address),
        n_step=n_step,
        discount=discount)

    # The dataset provides an interface to sample from replay.
    replay_client = reverb.TFClient(address)
    action_spec: specs.DiscreteArray = environment_spec.actions
    dataset = datasets.make_reverb_dataset(
        client=replay_client,
        environment_spec=environment_spec,
        extra_spec={
            'pi': specs.Array(
                shape=(action_spec.num_values,), dtype=np.float32)
        },
        transition_adder=True)

    dataset = dataset.batch(batch_size, drop_remainder=True)

    tf2_utils.create_variables(network, [environment_spec.observations])

    # Now create the agent components: actor & learner.
    actor = acting.MCTSActor(
        environment_spec=environment_spec,
        model=model,
        network=network,
        discount=discount,
        adder=adder,
        num_simulations=num_simulations,
    )

    learner = learning.AZLearner(
        network=network,
        optimizer=optimizer,
        dataset=dataset,
        discount=discount,
    )

    # The parent class combines these together into one 'agent'.
    super().__init__(
        actor=actor,
        learner=learner,
        min_observations=10,
        observations_per_step=1,
    )