def test_action(self, obs):
     self.controller.current_state = handcraft_function.get_all_observation(obs)
     state = self.controller.current_state
     action_and_parameter = self.controller.network.action(state)
     macro_and_parameter = handcraft_function.reflect(obs, action_and_parameter)
     action = handcraft_function.assembly_action(obs, self.index, macro_and_parameter)
     return action
 def test_action(self, obs):
     # self.controller.current_state = handcraft_function_for_level_2_attack_controller.get_raw_units_observation(obs)
     self.controller.current_state = handcraft_function.get_all_observation(
         obs)
     state = self.controller.current_state
     action_and_parameter = self.controller.network.action(state)
     macro_and_parameter = handcraft_function.reflect(
         len(sa.attack_controller), action_and_parameter)
     action = handcraft_function.assembly_action(obs, self.index,
                                                 macro_and_parameter)
     return action
Beispiel #3
0
 def train_action(self, obs):
     self.top_decision_maker.current_state = handcraft_function.get_all_observation(
         obs)
     if self.top_decision_maker.previous_action is not None:
         self.top_decision_maker.network.perceive(
             self.top_decision_maker.previous_state,
             self.top_decision_maker.previous_action,
             self.top_decision_maker.previous_reward,
             self.top_decision_maker.current_state, obs.last())
     controller_number = self.top_decision_maker.network.egreedy_action(
         self.top_decision_maker.current_state)
     self.top_decision_maker.previous_reward = obs.reward
     self.top_decision_maker.previous_state = self.top_decision_maker.current_state
     self.top_decision_maker.previous_action = controller_number
     return controller_number
 def train_action(self, obs):
     self.controller.current_state = handcraft_function.get_all_observation(obs)
     if self.controller.previous_action is not None:
         self.controller.network.perceive(self.controller.previous_state,
                                          self.controller.previous_action,
                                          self.controller.previous_reward,
                                          self.controller.current_state,
                                          obs.last())
     action_and_parameter = self.controller.network.egreedy_action(self.controller.current_state)
     self.controller.previous_reward = obs.reward
     self.controller.previous_state = self.controller.current_state
     self.controller.previous_action = action_and_parameter
     action_and_parameter = handcraft_function.reflect(obs, action_and_parameter)
     action = handcraft_function.assembly_action(obs, self.index, action_and_parameter)
     return action
    def train_action(self, obs):
        # self.controller.current_state = get_raw_units_observation(obs)
        self.controller.current_state = handcraft_function.get_all_observation(
            obs)
        if self.controller.previous_action is not None:
            self.controller.network.perceive(self.controller.previous_state,
                                             self.controller.previous_action,
                                             self.controller.previous_reward,
                                             self.controller.current_state,
                                             obs.last())
            # print(obs.reward)
        action_and_parameter = self.controller.network.egreedy_action(
            self.controller.current_state)
        self.controller.previous_reward = reward_compute_1(obs)
        self.controller.previous_state = self.controller.current_state
        action_and_parameter = handcraft_function.reflect(
            len(sa.attack_controller), action_and_parameter)
        self.controller.previous_action = action_and_parameter

        action = handcraft_function.assembly_action(obs, self.index,
                                                    action_and_parameter)
        return action
Beispiel #6
0
 def test_action(self, obs):
     self.top_decision_maker.current_state = handcraft_function.get_all_observation(
         obs)
     return self.top_decision_maker.network.action(
         self.top_decision_maker.current_state)