def step(self, obs, world_state): super(RandomAgent, self).step(obs) selected_actions = [] if world_state: pIDs = world_state.get_player_ids() if len(pIDs) == 5: for player_id in pIDs: function_id = numpy.random.choice( obs.observation['available_actions'][player_id]) print( 'RandomAgent chose random action: %d for player_id %d' % (function_id, player_id)) if function_id == 3: ability_ids = world_state.get_player_ability_ids( player_id, True) #TODO - remove False when implemented if len(ability_ids) > 0: rand = numpy.random.randint(0, len(ability_ids)) name = ability_ids[rand] print( 'PID: %d, Rand: %d, RandName: %s, AbilityIDS: %s' % (player_id, rand, name, str(ability_ids))) args = [[name]] else: args = [[0]] elif function_id == 4: rand = loc.Location.uniform_rand() scaled_rand = rand.scale( world_state.player_data[player_id]. get_reachable_distance()) curr_loc = world_state.player_data[ player_id].get_location() new_loc = curr_loc + scaled_rand args = [[new_loc.x, new_loc.y, new_loc.z], [0]] # TODO - implement move action to use normal/push/queued styles else: args = [[ numpy.random.randint(0, size) for size in arg.sizes ] for arg in self.action_spec.functions[function_id].args] selected_actions.append( actions.FunctionCall(player_id, function_id, args)) # now add team-wide functions, (we use pid = 0) if len(obs.observation['available_actions'][0]) > 0: function_id = numpy.random.choice( obs.observation['available_actions'][0]) print('RandomAgent chose random action: %d for the team' % (function_id)) args = [[ numpy.random.randint(0, size) for size in arg.sizes ] for arg in self.action_spec.functions[function_id].args] selected_actions.append( actions.FunctionCall(0, function_id, args)) #print("RandomAgent selected actions:", selected_actions) return selected_actions
def step(self, obs): self.steps += 1 self.reward += obs.reward return actions.FunctionCall(-1, 0, [])
def step(self, obs, world_state): super(MoveAgent, self).step(obs) #if self.steps >= 300: # self.qlearn.dump_table() # self._state = environment.StepType.LAST if not world_state: return [] pids = world_state.get_player_ids() if len(pids) < 5: return [] selected_actions = [] for pid in pids: player = world_state.get_player_by_id(pid) player_loc = player.get_location() dist_to_loc = player_loc.dist(self.dest_loc) # initialize our previous variables if first valid step if not pid in self.previous_dist.keys(): self.previous_dist[pid] = dist_to_loc self.previous_action[pid] = None self.previous_state[pid] = None loc_delta = self.dest_loc - player_loc desired_degree_facing = math.degrees( math.atan2(loc_delta.y, loc_delta.x)) if desired_degree_facing < 22.5 or desired_degree_facing >= ( 360.0 - 22.5): desired_degree_facing = int(0) elif desired_degree_facing < ( 45.0 + 22.5) or desired_degree_facing >= 22.5: desired_degree_facing = int(45) elif desired_degree_facing < ( 90.0 + 22.5) or desired_degree_facing >= (90.0 - 22.5): desired_degree_facing = int(90) elif desired_degree_facing < ( 135.0 + 22.5) or desired_degree_facing >= (135.0 - 22.5): desired_degree_facing = int(135) elif desired_degree_facing < ( 180.0 + 22.5) or desired_degree_facing >= (180.0 - 22.5): desired_degree_facing = int(180) elif desired_degree_facing < ( 225.0 + 22.5) or desired_degree_facing >= (225.0 - 22.5): desired_degree_facing = int(225) elif desired_degree_facing < ( 270.0 + 22.5) or desired_degree_facing >= (270.0 - 22.5): desired_degree_facing = int(270) elif desired_degree_facing < ( 315.0 + 22.5) or desired_degree_facing >= (315.0 - 22.5): desired_degree_facing = int(315) else: raise Exception("Bad Desired Angle: %f" % desired_degree_facing) # discretize our location to a square cell (_CELL_GRID_SIZE units wide and tall) x_grid = int(player_loc.x / _CELL_GRID_SIZE) y_grid = int(player_loc.y / _CELL_GRID_SIZE) # estimated state space size: 8 * (14000 x 14000) / (_CELL_GRID_SIZE * _CELL_GRID_SIZE) # example: 156,800 with _CELL_GRID_SIZE == 100.0 current_state = np.zeros(3) current_state[ 0] = x_grid # (14,000 / _CELL_GRID_SIZE) x_grid values current_state[ 1] = y_grid # (14,000 / _CELL_GRID_SIZE) y_grid values current_state[2] = desired_degree_facing # 8 facing values # with 156,800 states and 11 possible actions we estimate our full # models contains 1,724,800 state-action nodes # if we previously took an action, evaluate its reward if self.previous_action[pid] is not None: reward = 0 if dist_to_loc < 50: reward += ARRIVED_AT_LOCATION_REWARD self._state = environment.StepType.LAST elif dist_to_loc < self.previous_dist[pid]: reward += TIME_STEP_CLOSER_REWARD elif dist_to_loc == self.previous_dist[pid]: reward += TIME_STEP_REWARD else: reward += DEFAULT_REWARD # update our learning model with the reward for that action print( "From State '%s' took Action '%s' and got '%f' reward arriving at new_state '%s'" % (self.previous_state[pid], self.previous_action[pid], reward, current_state)) print("Prev Dist was '%f', New Dist is '%f'" % (self.previous_dist[pid], dist_to_loc)) self.qlearn.learn(str(self.previous_state[pid]), self.previous_action[pid], reward, str(current_state)) # choose an action to take give our learning model rl_action = self.qlearn.choose_action(str(current_state)) smart_action = smart_actions[rl_action] self.previous_dist[pid] = dist_to_loc self.previous_state[pid] = current_state self.previous_action[pid] = rl_action degrees = 0 if '_' in smart_action: smart_action, degrees = smart_action.split('_') degrees = int(degrees) if smart_action == ACTION_DO_NOTHING: selected_actions.append( actions.FunctionCall(pid, _HERO_NO_OP, [])) elif smart_action == ACTION_CLEAR_ACTION: selected_actions.append( actions.FunctionCall(pid, _HERO_CLEAR_ACTION, [[0]])) elif smart_action == ACTION_CLEAR_ACTION_STOP: selected_actions.append( actions.FunctionCall(pid, _HERO_CLEAR_ACTION, [[1]])) elif smart_action == ACTION_MOVE: if _HERO_MOVE_TO_LOCATION in obs.observation[ "available_actions"][pid]: selected_actions.append( actions.FunctionCall(pid, _HERO_MOVE_TO_LOCATION, [ player.max_reachable_location(degrees), _NOT_QUEUED ])) else: selected_actions.append( actions.FunctionCall(pid, _HERO_NO_OP, [])) return selected_actions
def func_call(func_id, args): return actions.FunctionCall(func_id, [[int(v) for v in a] for a in args])