Exemple #1
0
 def build_replay_buffer(self, config):
     super().build_replay_buffer(config)
     policy = self.get_policy()
     self.virtual_replay = NumpyReplayBuffer(policy.observation_space,
                                             policy.action_space,
                                             config["virtual_buffer_size"])
     self.virtual_replay.seed(config["seed"])
Exemple #2
0
 def build_replay_buffer(self, config):
     """Construct replay buffer to hold samples."""
     policy = self.get_policy()
     self.replay = NumpyReplayBuffer(policy.observation_space,
                                     policy.action_space,
                                     config["buffer_size"])
     self.replay.seed(config["seed"])
Exemple #3
0
 def build_replay_buffer(self):
     super().build_replay_buffer()
     self.virtual_replay = NumpyReplayBuffer(
         self.observation_space,
         self.action_space,
         self.config["virtual_buffer_size"],
     )
     self.virtual_replay.seed(self.config["seed"])
Exemple #4
0
    def build_replay_buffer(self):
        """Construct the experience replay buffer.

        Should be called by subclasses on init.
        """
        self.replay = NumpyReplayBuffer(self.observation_space,
                                        self.action_space,
                                        self.config["buffer_size"])
        self.replay.seed(self.config["seed"])
Exemple #5
0
    def _init(self, config, env_creator):
        self._validate_config(config)
        self.workers = self._make_workers(env_creator,
                                          self._policy,
                                          config,
                                          num_workers=config["num_workers"])
        # Dummy optimizer to log stats since Trainer.collect_metrics is coupled with it
        self.optimizer = PolicyOptimizer(self.workers)

        policy = self.get_policy()
        policy.set_reward_from_config(config["env"], config["env_config"])

        self.replay = NumpyReplayBuffer(policy.observation_space,
                                        policy.action_space,
                                        config["buffer_size"])
        self.replay.add_fields(ReplayField(SampleBatch.ACTION_LOGP))
        self.replay.seed(config["seed"])
Exemple #6
0
def numpy_replay(obs_space, action_space, size, sample_batch):
    replay = NumpyReplayBuffer(obs_space, action_space, size)
    replay.add(sample_batch)
    return replay
Exemple #7
0
def replay(obs_space, action_space, samples):
    replay = NumpyReplayBuffer(obs_space, action_space, size=samples.count)
    replay.add(samples)
    return replay