def run_episode(AI, board_size: int = 21, max_turns: int = 400, starting_halite: int = 5000, agent_count: int = 4): """ Runs a complete episode with a given AI and game settings. :param AI: The AI function to call :param board_size: The size of the board to use :param max_turns: The turn at which the scores will be totaled & the winner decided :param starting_halite: The amount of Halite that each agent starts with :param agent_count: How many agents should be simulated :return: The final scores of each player """ environment = make("halite", configuration={ "size": board_size, "startingHalite": starting_halite }) environment.reset(num_agents=agent_count) state = environment.state[0] board = helpers.Board(raw_observation=state.observation, raw_configuration=environment.configuration) environment.run(["random", "random"]) out = environment.render(mode="html", width=500, height=450) f = open("halite.html", "w") f.write(out) f.close()
def sac_halite_agent(state, config): from collections import OrderedDict directions = [hh.ShipAction.NORTH, hh.ShipAction.SOUTH, hh.ShipAction.WEST, hh.ShipAction.EAST] board = hh.Board(state, config) me = board.current_player skalar_features = get_scalar_features(board) # skalar_features = skalar_features[np.newaxis, ...] # skalar_features = tf.convert_to_tensor(skalar_features, dtype=tf.float32) feature_maps = get_feature_maps(board) # feature_maps = feature_maps[np.newaxis, ...] # feature_maps = tf.convert_to_tensor(feature_maps, dtype=tf.float32) observation = OrderedDict({'feature_maps': feature_maps, 'scalar_features': skalar_features}) action_step = policy.action(observation).numpy() action_number = digitize_action(action_step) # action_number = action_step.action.numpy()[0] try: me.ships[0].next_action = directions[action_number] except IndexError: pass return me.next_actions
def run(self, observation, configuration): """ Queues next actions for each ship & shipyard and returns them. :param observation: :param configuration: :return: The set of player actions which can be passed to environment.run """ board = helpers.Board(raw_observation=observation, raw_configuration=configuration) current_player = board.current_player for ship in current_player.ships: ship.next_action = self.ship_ai(board, ship.id) for shipyard in current_player.shipyards: shipyard.next_action = self.shipyard_ai(board, ship.id) return current_player.next_actions
def __init__(self, is_action_continuous=False, debug=False): self._current_ship = None self._is_debug = debug self._is_act_continuous = is_action_continuous # self._episode_ended = False self._board_size = 5 self._starting_halite = 5000 self._env = make('halite', configuration={ "size": self._board_size, "startingHalite": self._starting_halite }, debug=True) self._trainer = self._env.train([None]) obs = self._trainer.reset() board = hh.Board(obs, self._env.configuration) scalar_features_size = get_scalar_features(board).shape feature_maps_size = get_feature_maps(board).shape # four sides for movements plus idleness if self._is_act_continuous: self.action_space = spaces.Box(low=-1, high=1, shape=(1, ), dtype=np.float32) else: self.action_space = spaces.Discrete(5) self.observation_space = spaces.Dict( OrderedDict( # for the simplest case with only 1 ship and no shipyards and enemies # there is only a halite map of size 21x21 # each cell has no more than 500 halite, rescale it to 0-1 { "feature_maps": spaces.Box(low=0, high=1, shape=feature_maps_size, dtype=np.float32), # (x, y, ship's halite, overall halite, time) "scalar_features": spaces.Box(low=0, high=1, shape=scalar_features_size, dtype=np.float32) }))
def step(self, action): if self._is_act_continuous: action_number = digitize_action(action) else: action_number = action actions = {} try: actions['0-1'] = ACTION_NAMES[action_number] except KeyError: pass obs, _, done, info = self._trainer.step(actions) next_board = hh.Board(obs, self._env.configuration) scalar_features = get_scalar_features(next_board) feature_maps = get_feature_maps(next_board) state = OrderedDict({ "feature_maps": feature_maps, "scalar_features": scalar_features }) # we pass next_board and current_ship to find this ship on the next # board and calculate a reward reward = get_ship_reward(next_board, self._current_ship) self._current_ship = next_board.ships['0-1'] if self._is_debug: print(next_board) try: print(f"Action: {ACTION_NAMES[action_number]}") except KeyError: print("Collection / staying") # print(f"halite = {next_board.current_player.halite}") try: halite_new = next_board.ships['0-1'].halite except KeyError: halite_new = 'no (it was destroyed of converted)' print(f"ship has {halite_new} halite") print(f"ship reward is {reward}") print(f"coordinates are {scalar_features}") # if done: # self._episode_ended = True return state, reward, done, info
def reset(self): # returns time_step # let's make a new environment so agent can study new halite location self._env = make('halite', configuration={ "size": self._board_size, "startingHalite": self._starting_halite }, debug=True) self._trainer = self._env.train([None]) obs = self._trainer.reset() board = hh.Board(obs, self._env.configuration) self._current_ship = board.ships['0-1'] if self._is_debug: print(board) scalar_features = get_scalar_features(board) feature_maps = get_feature_maps(board) # if self._feature_maps_size_two: # feature_maps = feature_maps[:, np.newaxis] # self._episode_ended = False return OrderedDict({ "feature_maps": feature_maps, "scalar_features": scalar_features })