def setUp(self) -> None: self.buffer = MultiStepBuffer(buffer_size=10, n_step=2) self.state = np.zeros([32, 32]) self.state_02 = np.ones([32, 32]) self.next_state = np.zeros([32, 32]) self.next_state_02 = np.ones([32, 32]) self.action = np.zeros([1]) self.action_02 = np.ones([1]) self.reward = np.zeros([1]) self.reward_02 = np.ones([1]) self.done = np.zeros([1]) self.done_02 = np.zeros([1]) self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
def setUp(self) -> None: self.buffer = PERBuffer(10) self.state = np.random.rand(32, 32) self.next_state = np.random.rand(32, 32) self.action = np.ones([1]) self.reward = np.ones([1]) self.done = np.zeros([1]) self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state)
def setUp(self) -> None: self.net = Mock() self.agent = DummyAgent(net=self.net) self.env = gym.make("CartPole-v0") self.n_step = 2 self.source = NStepExperienceSource(self.env, self.agent, Mock(), n_steps=self.n_step) self.state = np.zeros([32, 32]) self.state_02 = np.ones([32, 32]) self.next_state = np.zeros([32, 32]) self.next_state_02 = np.ones([32, 32]) self.action = np.zeros([1]) self.action_02 = np.ones([1]) self.reward = np.zeros([1]) self.reward_02 = np.ones([1]) self.done = np.zeros([1]) self.done_02 = np.zeros([1]) self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
def setUp(self) -> None: self.state = np.random.rand(32, 32) self.next_state = np.random.rand(32, 32) self.action = np.ones([1]) self.reward = np.ones([1]) self.done = np.zeros([1]) self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.source = Mock() self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False)) self.warm_start = 10 self.buffer = ReplayBuffer(20) for _ in range(self.warm_start): self.buffer.append(self.experience)
def setUp(self) -> None: self.state = np.random.rand(4, 84, 84) self.next_state = np.random.rand(4, 84, 84) self.action = np.ones([1]) self.reward = np.ones([1]) self.done = np.zeros([1]) self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.source = Mock() self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False)) self.batch_size = 8 self.buffer = Buffer(8) for _ in range(self.batch_size): self.buffer.append(self.experience)
def step(self) -> Tuple[Experience, float, bool]: """Takes a single step through the environment""" action = self.agent(self.state, self.device) new_state, reward, done, _ = self.env.step(action) experience = Experience(state=self.state, action=action, reward=reward, new_state=new_state, done=done) self.state = new_state if done: self.state = self.env.reset() return experience, reward, done
def step(self) -> Experience: """Carries out a single step in the environment""" action = self.agent(self.state, self.device) new_state, reward, done, _ = self.env.step(action) experience = Experience(state=self.state, action=action, reward=reward, new_state=new_state, done=done) self.state = new_state if done: self.state = self.env.reset() return experience
def step(self) -> Tuple[Experience, float, bool]: """ Takes an n-step in the environment Returns: Experience """ exp = self.single_step() while len(self.n_step_buffer) < self.n_steps: self.single_step() reward, next_state, done = self.get_transition_info() first_experience = self.n_step_buffer[0] multi_step_experience = Experience(first_experience.state, first_experience.action, reward, done, next_state) return multi_step_experience, exp.reward, exp.done