Esempio n. 1
0
    def setUp(self) -> None:
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=2)

        self.state = np.zeros([32, 32])
        self.state_02 = np.ones([32, 32])
        self.next_state = np.zeros([32, 32])
        self.next_state_02 = np.ones([32, 32])
        self.action = np.zeros([1])
        self.action_02 = np.ones([1])
        self.reward = np.zeros([1])
        self.reward_02 = np.ones([1])
        self.done = np.zeros([1])
        self.done_02 = np.zeros([1])

        self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state)
        self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
        self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
Esempio n. 2
0
    def setUp(self) -> None:
        self.buffer = PERBuffer(10)

        self.state = np.random.rand(32, 32)
        self.next_state = np.random.rand(32, 32)
        self.action = np.ones([1])
        self.reward = np.ones([1])
        self.done = np.zeros([1])
        self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state)
Esempio n. 3
0
    def setUp(self) -> None:
        self.net = Mock()
        self.agent = DummyAgent(net=self.net)
        self.env = gym.make("CartPole-v0")
        self.n_step = 2
        self.source = NStepExperienceSource(self.env, self.agent, Mock(), n_steps=self.n_step)

        self.state = np.zeros([32, 32])
        self.state_02 = np.ones([32, 32])
        self.next_state = np.zeros([32, 32])
        self.next_state_02 = np.ones([32, 32])
        self.action = np.zeros([1])
        self.action_02 = np.ones([1])
        self.reward = np.zeros([1])
        self.reward_02 = np.ones([1])
        self.done = np.zeros([1])
        self.done_02 = np.zeros([1])

        self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state)
        self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
        self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
Esempio n. 4
0
    def setUp(self) -> None:
        self.state = np.random.rand(32, 32)
        self.next_state = np.random.rand(32, 32)
        self.action = np.ones([1])
        self.reward = np.ones([1])
        self.done = np.zeros([1])
        self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state)

        self.source = Mock()
        self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False))
        self.warm_start = 10
        self.buffer = ReplayBuffer(20)
        for _ in range(self.warm_start):
            self.buffer.append(self.experience)
Esempio n. 5
0
    def setUp(self) -> None:
        self.state = np.random.rand(4, 84, 84)
        self.next_state = np.random.rand(4, 84, 84)
        self.action = np.ones([1])
        self.reward = np.ones([1])
        self.done = np.zeros([1])
        self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state)
        self.source = Mock()
        self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False))
        self.batch_size = 8
        self.buffer = Buffer(8)

        for _ in range(self.batch_size):
            self.buffer.append(self.experience)
Esempio n. 6
0
    def step(self) -> Tuple[Experience, float, bool]:
        """Takes a single step through the environment"""
        action = self.agent(self.state, self.device)
        new_state, reward, done, _ = self.env.step(action)
        experience = Experience(state=self.state,
                                action=action,
                                reward=reward,
                                new_state=new_state,
                                done=done)
        self.state = new_state

        if done:
            self.state = self.env.reset()

        return experience, reward, done
Esempio n. 7
0
    def step(self) -> Experience:
        """Carries out a single step in the environment"""
        action = self.agent(self.state, self.device)
        new_state, reward, done, _ = self.env.step(action)
        experience = Experience(state=self.state,
                                action=action,
                                reward=reward,
                                new_state=new_state,
                                done=done)
        self.state = new_state

        if done:
            self.state = self.env.reset()

        return experience
Esempio n. 8
0
    def step(self) -> Tuple[Experience, float, bool]:
        """
        Takes an n-step in the environment

        Returns:
            Experience
        """
        exp = self.single_step()

        while len(self.n_step_buffer) < self.n_steps:
            self.single_step()

        reward, next_state, done = self.get_transition_info()
        first_experience = self.n_step_buffer[0]
        multi_step_experience = Experience(first_experience.state,
                                           first_experience.action, reward,
                                           done, next_state)

        return multi_step_experience, exp.reward, exp.done