def step(self, state, random=False): if random: action = self.agent.get_random_action() else: action = self.agent.act(state) next_state = self.physics.step(GridActions(action)) reward = self.rewards.calculate_reward(state, GridActions(action), next_state) self.trainer.add_experience(state, action, reward, next_state) self.stats.add_experience((state, action, reward, copy.deepcopy(next_state))) self.step_count += 1 return copy.deepcopy(next_state)
def test_episode(self, scenario=None): state = copy.deepcopy(self.init_episode(scenario)) self.stats.on_episode_begin(self.episode_count) while not state.terminal: action = self.agent.get_exploitation_action_target(state) next_state = self.physics.step(GridActions(action)) reward = self.rewards.calculate_reward(state, GridActions(action), next_state) self.stats.add_experience((copy.deepcopy(state), action, reward, copy.deepcopy(next_state))) state = copy.deepcopy(next_state) self.stats.on_episode_end(self.episode_count) self.stats.log_testing_data(step=self.step_count)
def test_scenario(self, scenario): state = copy.deepcopy(self.init_episode(scenario)) while not state.all_terminal: for state.active_agent in range(state.num_agents): if state.terminal: continue action = self.agent.get_exploitation_action_target(state) state = self.physics.step(GridActions(action))
def step(self, state: DHState, random=False): for state.active_agent in range(state.num_agents): if state.terminal: continue if random: action = self.agent.get_random_action() else: action = self.agent.act(state) if not self.first_action: reward = self.rewards.calculate_reward( self.last_states[state.active_agent], GridActions(self.last_actions[state.active_agent]), state) self.trainer.add_experience( self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, state) self.stats.add_experience( (self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, copy.deepcopy(state))) self.last_states[state.active_agent] = copy.deepcopy(state) self.last_actions[state.active_agent] = action state = self.physics.step(GridActions(action)) if state.terminal: reward = self.rewards.calculate_reward( self.last_states[state.active_agent], GridActions(self.last_actions[state.active_agent]), state) self.trainer.add_experience( self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, state) self.stats.add_experience( (self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, copy.deepcopy(state))) self.step_count += 1 self.first_action = False return state
def test_episode(self): state = copy.deepcopy(self.init_episode()) self.stats.on_episode_begin(self.episode_count) first_action = True while not state.all_terminal: for state.active_agent in range(state.num_agents): if state.terminal: continue action = self.agent.get_exploitation_action_target(state) if not first_action: reward = self.rewards.calculate_reward( self.last_states[state.active_agent], GridActions(self.last_actions[state.active_agent]), state) self.stats.add_experience( (self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, copy.deepcopy(state))) self.last_states[state.active_agent] = copy.deepcopy(state) self.last_actions[state.active_agent] = action state = self.physics.step(GridActions(action)) if state.terminal: reward = self.rewards.calculate_reward( self.last_states[state.active_agent], GridActions(self.last_actions[state.active_agent]), state) self.stats.add_experience( (self.last_states[state.active_agent], self.last_actions[state.active_agent], reward, copy.deepcopy(state))) first_action = False self.stats.on_episode_end(self.episode_count) self.stats.log_testing_data(step=self.step_count)
def test_scenario(self, scenario): state = copy.deepcopy(self.init_episode(scenario)) while not state.terminal: action = self.agent.get_exploitation_action_target(state) state = self.physics.step(GridActions(action))