예제 #1
0
    def step(self, obs, world_state):
        super(RandomAgent, self).step(obs)

        selected_actions = []
        if world_state:
            pIDs = world_state.get_player_ids()
            if len(pIDs) == 5:
                for player_id in pIDs:
                    function_id = numpy.random.choice(
                        obs.observation['available_actions'][player_id])
                    print(
                        'RandomAgent chose random action: %d for player_id %d'
                        % (function_id, player_id))
                    if function_id == 3:
                        ability_ids = world_state.get_player_ability_ids(
                            player_id,
                            True)  #TODO - remove False when implemented
                        if len(ability_ids) > 0:
                            rand = numpy.random.randint(0, len(ability_ids))
                            name = ability_ids[rand]
                            print(
                                'PID: %d, Rand: %d, RandName: %s, AbilityIDS: %s'
                                % (player_id, rand, name, str(ability_ids)))
                            args = [[name]]
                        else:
                            args = [[0]]
                    elif function_id == 4:
                        rand = loc.Location.uniform_rand()
                        scaled_rand = rand.scale(
                            world_state.player_data[player_id].
                            get_reachable_distance())
                        curr_loc = world_state.player_data[
                            player_id].get_location()
                        new_loc = curr_loc + scaled_rand
                        args = [[new_loc.x, new_loc.y, new_loc.z], [0]]

                        # TODO - implement move action to use normal/push/queued styles
                    else:
                        args = [[
                            numpy.random.randint(0, size) for size in arg.sizes
                        ] for arg in
                                self.action_spec.functions[function_id].args]
                    selected_actions.append(
                        actions.FunctionCall(player_id, function_id, args))

                # now add team-wide functions, (we use pid = 0)
                if len(obs.observation['available_actions'][0]) > 0:
                    function_id = numpy.random.choice(
                        obs.observation['available_actions'][0])
                    print('RandomAgent chose random action: %d for the team' %
                          (function_id))
                    args = [[
                        numpy.random.randint(0, size) for size in arg.sizes
                    ] for arg in self.action_spec.functions[function_id].args]
                    selected_actions.append(
                        actions.FunctionCall(0, function_id, args))

        #print("RandomAgent selected actions:", selected_actions)
        return selected_actions
예제 #2
0
 def step(self, obs):
     self.steps += 1
     self.reward += obs.reward
     return actions.FunctionCall(-1, 0, [])
예제 #3
0
    def step(self, obs, world_state):
        super(MoveAgent, self).step(obs)

        #if self.steps >= 300:
        #    self.qlearn.dump_table()
        #    self._state = environment.StepType.LAST

        if not world_state:
            return []

        pids = world_state.get_player_ids()
        if len(pids) < 5:
            return []

        selected_actions = []
        for pid in pids:
            player = world_state.get_player_by_id(pid)
            player_loc = player.get_location()
            dist_to_loc = player_loc.dist(self.dest_loc)

            # initialize our previous variables if first valid step
            if not pid in self.previous_dist.keys():
                self.previous_dist[pid] = dist_to_loc
                self.previous_action[pid] = None
                self.previous_state[pid] = None

            loc_delta = self.dest_loc - player_loc
            desired_degree_facing = math.degrees(
                math.atan2(loc_delta.y, loc_delta.x))

            if desired_degree_facing < 22.5 or desired_degree_facing >= (
                    360.0 - 22.5):
                desired_degree_facing = int(0)
            elif desired_degree_facing < (
                    45.0 + 22.5) or desired_degree_facing >= 22.5:
                desired_degree_facing = int(45)
            elif desired_degree_facing < (
                    90.0 + 22.5) or desired_degree_facing >= (90.0 - 22.5):
                desired_degree_facing = int(90)
            elif desired_degree_facing < (
                    135.0 + 22.5) or desired_degree_facing >= (135.0 - 22.5):
                desired_degree_facing = int(135)
            elif desired_degree_facing < (
                    180.0 + 22.5) or desired_degree_facing >= (180.0 - 22.5):
                desired_degree_facing = int(180)
            elif desired_degree_facing < (
                    225.0 + 22.5) or desired_degree_facing >= (225.0 - 22.5):
                desired_degree_facing = int(225)
            elif desired_degree_facing < (
                    270.0 + 22.5) or desired_degree_facing >= (270.0 - 22.5):
                desired_degree_facing = int(270)
            elif desired_degree_facing < (
                    315.0 + 22.5) or desired_degree_facing >= (315.0 - 22.5):
                desired_degree_facing = int(315)
            else:
                raise Exception("Bad Desired Angle: %f" %
                                desired_degree_facing)

            # discretize our location to a square cell (_CELL_GRID_SIZE units wide and tall)
            x_grid = int(player_loc.x / _CELL_GRID_SIZE)
            y_grid = int(player_loc.y / _CELL_GRID_SIZE)

            # estimated state space size: 8 * (14000 x 14000) / (_CELL_GRID_SIZE * _CELL_GRID_SIZE)
            # example: 156,800 with _CELL_GRID_SIZE == 100.0
            current_state = np.zeros(3)
            current_state[
                0] = x_grid  # (14,000 / _CELL_GRID_SIZE) x_grid values
            current_state[
                1] = y_grid  # (14,000 / _CELL_GRID_SIZE) y_grid values
            current_state[2] = desired_degree_facing  # 8 facing values

            # with 156,800 states and 11 possible actions we estimate our full
            # models contains 1,724,800 state-action nodes

            # if we previously took an action, evaluate its reward
            if self.previous_action[pid] is not None:
                reward = 0

                if dist_to_loc < 50:
                    reward += ARRIVED_AT_LOCATION_REWARD
                    self._state = environment.StepType.LAST
                elif dist_to_loc < self.previous_dist[pid]:
                    reward += TIME_STEP_CLOSER_REWARD
                elif dist_to_loc == self.previous_dist[pid]:
                    reward += TIME_STEP_REWARD
                else:
                    reward += DEFAULT_REWARD

                # update our learning model with the reward for that action
                print(
                    "From State '%s' took Action '%s' and got '%f' reward arriving at new_state '%s'"
                    % (self.previous_state[pid], self.previous_action[pid],
                       reward, current_state))
                print("Prev Dist was '%f', New Dist is '%f'" %
                      (self.previous_dist[pid], dist_to_loc))
                self.qlearn.learn(str(self.previous_state[pid]),
                                  self.previous_action[pid], reward,
                                  str(current_state))

            # choose an action to take give our learning model
            rl_action = self.qlearn.choose_action(str(current_state))
            smart_action = smart_actions[rl_action]

            self.previous_dist[pid] = dist_to_loc
            self.previous_state[pid] = current_state
            self.previous_action[pid] = rl_action

            degrees = 0
            if '_' in smart_action:
                smart_action, degrees = smart_action.split('_')
                degrees = int(degrees)

            if smart_action == ACTION_DO_NOTHING:
                selected_actions.append(
                    actions.FunctionCall(pid, _HERO_NO_OP, []))

            elif smart_action == ACTION_CLEAR_ACTION:
                selected_actions.append(
                    actions.FunctionCall(pid, _HERO_CLEAR_ACTION, [[0]]))

            elif smart_action == ACTION_CLEAR_ACTION_STOP:
                selected_actions.append(
                    actions.FunctionCall(pid, _HERO_CLEAR_ACTION, [[1]]))

            elif smart_action == ACTION_MOVE:
                if _HERO_MOVE_TO_LOCATION in obs.observation[
                        "available_actions"][pid]:
                    selected_actions.append(
                        actions.FunctionCall(pid, _HERO_MOVE_TO_LOCATION, [
                            player.max_reachable_location(degrees), _NOT_QUEUED
                        ]))
            else:
                selected_actions.append(
                    actions.FunctionCall(pid, _HERO_NO_OP, []))

        return selected_actions
예제 #4
0
 def func_call(func_id, args):
     return actions.FunctionCall(func_id, [[int(v) for v in a] for a in args])