コード例 #1
0
def run_episode(AI,
                board_size: int = 21,
                max_turns: int = 400,
                starting_halite: int = 5000,
                agent_count: int = 4):
    """
    Runs a complete episode with a given AI and game settings.
    :param AI: The AI function to call
    :param board_size: The size of the board to use
    :param max_turns: The turn at which the scores will be totaled & the winner decided
    :param starting_halite: The amount of Halite that each agent starts with
    :param agent_count: How many agents should be simulated
    :return: The final scores of each player
    """
    environment = make("halite",
                       configuration={
                           "size": board_size,
                           "startingHalite": starting_halite
                       })
    environment.reset(num_agents=agent_count)
    state = environment.state[0]
    board = helpers.Board(raw_observation=state.observation,
                          raw_configuration=environment.configuration)

    environment.run(["random", "random"])
    out = environment.render(mode="html", width=500, height=450)

    f = open("halite.html", "w")
    f.write(out)
    f.close()
コード例 #2
0
ファイル: halite_main.py プロジェクト: limash/softlearning
    def sac_halite_agent(state, config):
        from collections import OrderedDict

        directions = [hh.ShipAction.NORTH,
                      hh.ShipAction.SOUTH,
                      hh.ShipAction.WEST,
                      hh.ShipAction.EAST]

        board = hh.Board(state, config)
        me = board.current_player

        skalar_features = get_scalar_features(board)
        # skalar_features = skalar_features[np.newaxis, ...]
        # skalar_features = tf.convert_to_tensor(skalar_features, dtype=tf.float32)
        feature_maps = get_feature_maps(board)
        # feature_maps = feature_maps[np.newaxis, ...]
        # feature_maps = tf.convert_to_tensor(feature_maps, dtype=tf.float32)
        observation = OrderedDict({'feature_maps': feature_maps, 'scalar_features': skalar_features})

        action_step = policy.action(observation).numpy()
        action_number = digitize_action(action_step)
        # action_number = action_step.action.numpy()[0]
        try:
            me.ships[0].next_action = directions[action_number]
        except IndexError:
            pass
        return me.next_actions
コード例 #3
0
    def run(self, observation, configuration):
        """
        Queues next actions for each ship & shipyard and returns them.
        :param observation:
        :param configuration:
        :return: The set of player actions which can be passed to environment.run
        """
        board = helpers.Board(raw_observation=observation,
                              raw_configuration=configuration)
        current_player = board.current_player
        for ship in current_player.ships:
            ship.next_action = self.ship_ai(board, ship.id)
        for shipyard in current_player.shipyards:
            shipyard.next_action = self.shipyard_ai(board, ship.id)

        return current_player.next_actions
コード例 #4
0
    def __init__(self, is_action_continuous=False, debug=False):
        self._current_ship = None
        self._is_debug = debug
        self._is_act_continuous = is_action_continuous
        # self._episode_ended = False
        self._board_size = 5
        self._starting_halite = 5000
        self._env = make('halite',
                         configuration={
                             "size": self._board_size,
                             "startingHalite": self._starting_halite
                         },
                         debug=True)
        self._trainer = self._env.train([None])
        obs = self._trainer.reset()

        board = hh.Board(obs, self._env.configuration)
        scalar_features_size = get_scalar_features(board).shape
        feature_maps_size = get_feature_maps(board).shape

        # four sides for movements plus idleness
        if self._is_act_continuous:
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(1, ),
                                           dtype=np.float32)
        else:
            self.action_space = spaces.Discrete(5)

        self.observation_space = spaces.Dict(
            OrderedDict(
                # for the simplest case with only 1 ship and no shipyards and enemies
                # there is only a halite map of size 21x21
                # each cell has no more than 500 halite, rescale it to 0-1
                {
                    "feature_maps":
                    spaces.Box(low=0,
                               high=1,
                               shape=feature_maps_size,
                               dtype=np.float32),
                    # (x, y, ship's halite, overall halite, time)
                    "scalar_features":
                    spaces.Box(low=0,
                               high=1,
                               shape=scalar_features_size,
                               dtype=np.float32)
                }))
コード例 #5
0
    def step(self, action):
        if self._is_act_continuous:
            action_number = digitize_action(action)
        else:
            action_number = action

        actions = {}
        try:
            actions['0-1'] = ACTION_NAMES[action_number]
        except KeyError:
            pass

        obs, _, done, info = self._trainer.step(actions)
        next_board = hh.Board(obs, self._env.configuration)
        scalar_features = get_scalar_features(next_board)
        feature_maps = get_feature_maps(next_board)
        state = OrderedDict({
            "feature_maps": feature_maps,
            "scalar_features": scalar_features
        })

        # we pass next_board and current_ship to find this ship on the next
        # board and calculate a reward
        reward = get_ship_reward(next_board, self._current_ship)
        self._current_ship = next_board.ships['0-1']

        if self._is_debug:
            print(next_board)
            try:
                print(f"Action: {ACTION_NAMES[action_number]}")
            except KeyError:
                print("Collection / staying")
            # print(f"halite = {next_board.current_player.halite}")
            try:
                halite_new = next_board.ships['0-1'].halite
            except KeyError:
                halite_new = 'no (it was destroyed of converted)'
            print(f"ship has {halite_new} halite")
            print(f"ship reward is {reward}")
            print(f"coordinates are {scalar_features}")

        # if done:
        #     self._episode_ended = True
        return state, reward, done, info
コード例 #6
0
 def reset(self):
     # returns time_step
     # let's make a new environment so agent can study new halite location
     self._env = make('halite',
                      configuration={
                          "size": self._board_size,
                          "startingHalite": self._starting_halite
                      },
                      debug=True)
     self._trainer = self._env.train([None])
     obs = self._trainer.reset()
     board = hh.Board(obs, self._env.configuration)
     self._current_ship = board.ships['0-1']
     if self._is_debug:
         print(board)
     scalar_features = get_scalar_features(board)
     feature_maps = get_feature_maps(board)
     # if self._feature_maps_size_two:
     #     feature_maps = feature_maps[:, np.newaxis]
     # self._episode_ended = False
     return OrderedDict({
         "feature_maps": feature_maps,
         "scalar_features": scalar_features
     })