Exemple #1
0
 def step(self, state, random=False):
     if random:
         action = self.agent.get_random_action()
     else:
         action = self.agent.act(state)
     next_state = self.physics.step(GridActions(action))
     reward = self.rewards.calculate_reward(state, GridActions(action), next_state)
     self.trainer.add_experience(state, action, reward, next_state)
     self.stats.add_experience((state, action, reward, copy.deepcopy(next_state)))
     self.step_count += 1
     return copy.deepcopy(next_state)
Exemple #2
0
    def test_episode(self, scenario=None):
        state = copy.deepcopy(self.init_episode(scenario))
        self.stats.on_episode_begin(self.episode_count)
        while not state.terminal:
            action = self.agent.get_exploitation_action_target(state)
            next_state = self.physics.step(GridActions(action))
            reward = self.rewards.calculate_reward(state, GridActions(action), next_state)
            self.stats.add_experience((copy.deepcopy(state), action, reward, copy.deepcopy(next_state)))
            state = copy.deepcopy(next_state)

        self.stats.on_episode_end(self.episode_count)
        self.stats.log_testing_data(step=self.step_count)
Exemple #3
0
 def test_scenario(self, scenario):
     state = copy.deepcopy(self.init_episode(scenario))
     while not state.all_terminal:
         for state.active_agent in range(state.num_agents):
             if state.terminal:
                 continue
             action = self.agent.get_exploitation_action_target(state)
             state = self.physics.step(GridActions(action))
Exemple #4
0
    def step(self, state: DHState, random=False):
        for state.active_agent in range(state.num_agents):
            if state.terminal:
                continue
            if random:
                action = self.agent.get_random_action()
            else:
                action = self.agent.act(state)
            if not self.first_action:
                reward = self.rewards.calculate_reward(
                    self.last_states[state.active_agent],
                    GridActions(self.last_actions[state.active_agent]), state)
                self.trainer.add_experience(
                    self.last_states[state.active_agent],
                    self.last_actions[state.active_agent], reward, state)
                self.stats.add_experience(
                    (self.last_states[state.active_agent],
                     self.last_actions[state.active_agent], reward,
                     copy.deepcopy(state)))

            self.last_states[state.active_agent] = copy.deepcopy(state)
            self.last_actions[state.active_agent] = action
            state = self.physics.step(GridActions(action))
            if state.terminal:
                reward = self.rewards.calculate_reward(
                    self.last_states[state.active_agent],
                    GridActions(self.last_actions[state.active_agent]), state)
                self.trainer.add_experience(
                    self.last_states[state.active_agent],
                    self.last_actions[state.active_agent], reward, state)
                self.stats.add_experience(
                    (self.last_states[state.active_agent],
                     self.last_actions[state.active_agent], reward,
                     copy.deepcopy(state)))

        self.step_count += 1
        self.first_action = False
        return state
Exemple #5
0
    def test_episode(self):
        state = copy.deepcopy(self.init_episode())
        self.stats.on_episode_begin(self.episode_count)
        first_action = True
        while not state.all_terminal:
            for state.active_agent in range(state.num_agents):
                if state.terminal:
                    continue
                action = self.agent.get_exploitation_action_target(state)
                if not first_action:
                    reward = self.rewards.calculate_reward(
                        self.last_states[state.active_agent],
                        GridActions(self.last_actions[state.active_agent]),
                        state)
                    self.stats.add_experience(
                        (self.last_states[state.active_agent],
                         self.last_actions[state.active_agent], reward,
                         copy.deepcopy(state)))

                self.last_states[state.active_agent] = copy.deepcopy(state)
                self.last_actions[state.active_agent] = action
                state = self.physics.step(GridActions(action))
                if state.terminal:
                    reward = self.rewards.calculate_reward(
                        self.last_states[state.active_agent],
                        GridActions(self.last_actions[state.active_agent]),
                        state)
                    self.stats.add_experience(
                        (self.last_states[state.active_agent],
                         self.last_actions[state.active_agent], reward,
                         copy.deepcopy(state)))

            first_action = False

        self.stats.on_episode_end(self.episode_count)
        self.stats.log_testing_data(step=self.step_count)
Exemple #6
0
 def test_scenario(self, scenario):
     state = copy.deepcopy(self.init_episode(scenario))
     while not state.terminal:
         action = self.agent.get_exploitation_action_target(state)
         state = self.physics.step(GridActions(action))