Ejemplo n.º 1
0
 def update_env_after_move(board: Board, move: Move, north_moved):
     if not KalahEnvironment.is_permitted(board, move, north_moved):
         raise Exception('Move not permitted')
     if move.index == 0:
         KalahEnvironment.swap_sides(board)
         return Side.opposite(move.side)
     seeds_to_sow = board.get_seeds(move.side, move.index)
     board.set_seeds(move.side, move.index, 0)
     holes = board.holes
     receiving_holes = 2 * holes + 1
     rounds = seeds_to_sow // receiving_holes
     remaining_seeds = seeds_to_sow % receiving_holes
     if rounds != 0:
         for hole in range(1, holes + 1):
             board.add_seeds(Side.NORTH, hole, rounds)
             board.add_seeds(Side.SOUTH, hole, rounds)
         board.add_seeds_to_store(move.side, rounds)
     sow_side = move.side
     sow_hole = move.index
     for _ in range(remaining_seeds):
         sow_hole += 1
         if sow_hole == 1:
             sow_side = Side.opposite(sow_side)
         if sow_hole > holes:
             if sow_side == move.side:
                 sow_hole = 0
                 board.add_seeds_to_store(sow_side, 1)
                 continue
             else:
                 sow_side = Side.opposite(sow_side)
                 sow_hole = 1
         board.add_seeds(sow_side, sow_hole, 1)
     if sow_side == move.side and sow_hole > 0 and board.get_seeds(sow_side, sow_hole) == 1 \
             and board.get_seeds_op(sow_side, sow_hole) > 0:
         board.add_seeds_to_store(move.side, 1 + board.get_seeds_op(sow_side, sow_hole))
         board.set_seeds(move.side, sow_hole, 0)
         board.set_seeds_op(move.side, sow_hole, 0)
     game_over = KalahEnvironment.game_finished(board)
     if game_over:
         finished_side = Side.NORTH if KalahEnvironment.side_has_no_seeds(board, Side.NORTH) else Side.SOUTH
         seeds = 0
         collecting_side = Side.opposite(finished_side)
         for hole in range(1, board.holes + 1):
             seeds += board.get_seeds(collecting_side, hole)
             board.set_seeds(collecting_side, hole, 0)
         board.add_seeds_to_store(collecting_side, seeds)
     if sow_hole == 0 and (move.side == Side.NORTH or north_moved):
         return move.side
     return Side.opposite(move.side)
Ejemplo n.º 2
0
def h1(state: KalahEnvironment, side: Side) -> float:
    my_mancala = state.board.get_seeds_in_store(side)
    opponent_mancala = state.board.get_seeds_in_store(Side.opposite(side))

    diff = my_mancala - opponent_mancala

    return diff
Ejemplo n.º 3
0
    def set_seeds_op(self, side: Side, hole: int, seeds: int):
        if hole < 1 or hole > self.holes:
            raise ValueError(
                'Hole number must be between 1 and number of holes')
        if seeds < 0:
            raise ValueError('There has to be a non-negative number of seeds')

        self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                        hole] = seeds
Ejemplo n.º 4
0
 def get_winner(self) -> Side or None:
     if not self.has_game_ended():
         raise Exception('Game has not ended')
     last_move_side = Side.NORTH if KalahEnvironment.side_has_no_seeds(self.board, Side.NORTH) else Side.SOUTH
     other_side = Side.opposite(last_move_side)
     last_move_side_seeds = self.board.get_seeds_in_store(other_side)
     for hole in range(1, self.board.holes + 1):
         last_move_side_seeds += self.board.get_seeds(other_side, hole)
     other_side_seeds = self.board.get_seeds_in_store(last_move_side)
     if other_side_seeds > last_move_side_seeds:
         return last_move_side
     elif other_side_seeds < last_move_side_seeds:
         return other_side
     else:
         return None
Ejemplo n.º 5
0
def env_runner(env, trainer_side, ac_net, opp_agent):
    """
The logic of the thread runner.  In brief, it constantly keeps on running
the policy, and as long as the rollout exceeds a certain length, the thread
runner appends the policy to the queue.
"""
    rollout = Rollout()

    while not env.has_game_ended():
        # There is no choice if only one action is left. Taking that action automatically must be seen as
        # a characteristic behaviour of the environment. This helped the learning of the agent
        # to be more numerically stable (this is an empirical observation).
        if len(env.get_valid_moves()) == 1:
            action_left_to_perform = env.get_valid_moves()[0]
            env.do_move(action_left_to_perform)
            continue

        if env.side_to_play == trainer_side:
            # If the agent is playing as NORTH, it's input would be a flipped board
            flip_board = env.side_to_play == Side.NORTH
            state = env.board.get_board_image(flipped=flip_board)
            mask = env.get_mask()

            action, value = ac_net.sample(state, mask)
            # Because the pie move with index 0 is ignored, the action indexes must be shifted by one
            reward = env.do_move(Move(trainer_side, action + 1))
            rollout.add(state, action, reward, value, mask)
        else:
            assert env.side_to_play == Side.opposite(trainer_side)
            action = opp_agent.produce_action(env.board.get_board_image(),
                                              env.get_mask(), env.side_to_play)
            env.do_move(Move(env.side_to_play, action + 1))

        # We replace the partial reward of the last move with the final reward of the game
    final_reward = env.calculate_score_diff(trainer_side)
    rollout.update_last_reward(final_reward)

    if env.get_winner() == trainer_side:
        rollout.add_win()
    return rollout
Ejemplo n.º 6
0
 def calculate_score_diff(self, side: Side):
     diff = self.board.get_seeds_in_store(side) - self.board.get_seeds_in_store(Side.opposite(side))
     return diff
Ejemplo n.º 7
0
 def do_move(self, move: Move):
     if move.index == 0:
         self.my_side = Side.opposite(self.my_side)
     self.side_to_play = KalahEnvironment.update_env_after_move(self.board, move, self.north_has_moved)
     if move.side == Side.NORTH:
         self.north_has_moved = True
Ejemplo n.º 8
0
 def get_seeds_op(self, side: Side, hole: int):
     if hole < 1 or hole > self.holes:
         raise ValueError(
             'Hole number must be between 1 and number of holes')
     return self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                            hole]