Esempio n. 1
0
def _scoring_store_diff(state: MancalaEnv, parent_side: Side) -> int:
    """Calculates the differences between two stores."""
    our_seeds = state.board.get_seeds_in_store(parent_side)
    their_seeds = state.board.get_seeds_in_store(Side.opposite(parent_side))

    reward = our_seeds - their_seeds

    return reward
Esempio n. 2
0
    def set_seeds_op(self, side: Side, hole: int, seeds: int):
        if hole < 1 or hole > self.holes:
            raise ValueError(
                'Hole number must be between 1 and number of holes')
        if seeds < 0:
            raise ValueError('There has to be a non-negative number of seeds')

        self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                        hole] = seeds
Esempio n. 3
0
    def perform_move(self, move: Move) -> int:
        """Performs a move and returns the reward for this move."""
        seeds_in_store_before = self.board.get_seeds_in_store(move.side)
        if move.index == 0:  # pie move
            self.our_side = Side.opposite(self.our_side)
        self.side_to_move = MancalaEnv.make_move(self.board, move,
                                                 self.north_moved)
        if move.side == Side.NORTH:
            self.north_moved = True
        seeds_in_store_after = self.board.get_seeds_in_store(move.side)

        # Return a partial reward proportional to the number of captured seeds.
        return (seeds_in_store_after - seeds_in_store_before) / 100.0
Esempio n. 4
0
    def _run(self) -> Rollout:
        # Choose randomly the side to play
        self.trainer_side = Side.SOUTH if random.randint(
            0, 1) == 0 else Side.NORTH
        # Reset the environment so everything is in a clean state.
        self.env.reset()

        rollout = Rollout()
        while not self.env.is_game_over():
            # There is no choice if only one action is left. Taking that action automatically must be seen as
            # a characteristic behaviour of the environment. This helped the learning of the agent
            # to be more numerically stable (this is an empirical observation).
            if len(self.env.get_legal_moves()) == 1:
                action_left_to_perform = self.env.get_legal_moves()[0]
                self.env.perform_move(action_left_to_perform)
                continue

            if self.env.side_to_move == self.trainer_side:
                # If the agent is playing as NORTH, it's input would be a flipped board
                flip_board = self.env.side_to_move == Side.NORTH
                state = self.env.board.get_board_image(flipped=flip_board)
                mask = self.env.get_action_mask_with_no_pie()

                action, value = self.ac_net.sample(state, mask)
                # Because the pie move with index 0 is ignored, the action indexes must be shifted by one
                reward = self.env.perform_move(
                    Move(self.trainer_side, action + 1))
                rollout.add(state, action, reward, value, mask)
            else:
                assert self.env.side_to_move == Side.opposite(
                    self.trainer_side)
                action = self.opp_agent.produce_action(
                    self.env.board.get_board_image(),
                    self.env.get_action_mask_with_no_pie(),
                    self.env.side_to_move)
                self.env.perform_move(Move(self.env.side_to_move, action + 1))

        # We replace the partial reward of the last move with the final reward of the game
        final_reward = self.env.compute_final_reward(self.trainer_side)
        rollout.update_last_reward(final_reward)

        if self.env.get_winner() == self.trainer_side:
            rollout.add_win()
        return rollout
Esempio n. 5
0
    def get_winner(self) -> Side or None:
        """
        :return: The winning Side of the game or none if there is a tie.
        """
        if not self.is_game_over():
            raise ValueError(
                'This method should be called only when the game is over')
        finished_side = Side.NORTH if MancalaEnv.holes_empty(
            self.board, Side.NORTH) else Side.SOUTH

        not_finished_side = Side.opposite(finished_side)
        not_finished_side_seeds = self.board.get_seeds_in_store(
            not_finished_side)
        for hole in range(1, self.board.holes + 1):
            not_finished_side_seeds += self.board.get_seeds(
                not_finished_side, hole)
        finished_side_seeds = self.board.get_seeds_in_store(finished_side)

        if finished_side_seeds > not_finished_side_seeds:
            return finished_side
        elif finished_side_seeds < not_finished_side_seeds:
            return not_finished_side
        return None
Esempio n. 6
0
 def get_seeds_op(self, side: Side, hole: int):
     if hole < 1 or hole > self.holes:
         raise ValueError(
             'Hole number must be between 1 and number of holes')
     return self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                            hole]
Esempio n. 7
0
 def test_side_opposite_is_correct(self):
     self.assertEqual(Side.opposite(Side.NORTH), Side.SOUTH)
     self.assertEqual(Side.opposite(Side.SOUTH), Side.NORTH)
Esempio n. 8
0
def _run_game(player: Player, state: MancalaEnv):
    our_agent_states = []
    their_agent_states = []
    both_agent_states = []
    our_side = Side.SOUTH
    while True:
        msg = protocol.read_msg()
        try:
            msg_type = protocol.get_msg_type(msg)
            if msg_type == MsgType.START:
                first = protocol.interpret_start_msg(msg)
                if first:
                    move = player.get_play(state)
                    protocol.send_msg(protocol.create_move_msg(move.index))
                else:
                    our_side = Side.NORTH
            elif msg_type == MsgType.STATE:
                move_turn = protocol.interpret_state_msg(msg)
                if move_turn.move == 0:
                    our_side = Side.opposite(our_side)

                move_to_perform = Move(state.side_to_move, move_turn.move)

                observed_state = ObservedState(state=state,
                                               action_taken=move_to_perform)
                both_agent_states.append(observed_state)
                if state.side_to_move == our_side:
                    our_agent_states.append(observed_state)
                else:
                    their_agent_states.append(observed_state)

                state.perform_move(move_to_perform)
                if not move_turn.end:
                    if move_turn.again:
                        move = player.get_play(state)
                        # pie rule; optimal move is to swap
                        if move.index == 0:
                            protocol.send_msg(protocol.create_swap_msg())
                        else:
                            protocol.send_msg(
                                protocol.create_move_msg(move.index))

            elif msg_type == MsgType.END:
                args = parser.parse_args()
                run_id = '%06d' % args.run_number
                run_category = args.category

                _our_agent_file_path = _checkpoint_file_path + "/our-agent/" + run_category + run_id
                _their_agent_file_path = _checkpoint_file_path + "/their-agent/" + run_category + run_id
                _both_agent_file_path = _checkpoint_file_path + "/both-agent/" + run_category + run_id

                np.save(file=_our_agent_file_path,
                        arr=np.array(our_agent_states))
                np.save(file=_their_agent_file_path,
                        arr=np.array(their_agent_states))
                np.save(file=_both_agent_file_path,
                        arr=np.array(both_agent_states))
                break
            else:
                print("Not sure what I got " + str(msg_type))
        except InvalidMessageException as _e:
            print(str(_e))
Esempio n. 9
0
 def compute_final_reward(self, side: Side):
     """Returns a reward for the specified side for moving to the current state."""
     reward = self.board.get_seeds_in_store(
         side) - self.board.get_seeds_in_store(Side.opposite(side))
     return reward
Esempio n. 10
0
    def make_move(board: Board, move: Move, north_moved):
        if not MancalaEnv.is_legal_action(board, move, north_moved):
            raise ValueError(
                'Move is illegal: Board: \n {} \n Move:\n {}/{} \n {}'.format(
                    board, move.index, move.side, north_moved))

        # This is a pie move
        if move.index == 0:
            MancalaEnv.switch_sides(board)
            return Side.opposite(move.side)

        seeds_to_sow = board.get_seeds(move.side, move.index)
        board.set_seeds(move.side, move.index, 0)

        holes = board.holes
        # Place seeds in all holes excepting the opponent's store
        receiving_holes = 2 * holes + 1
        # Rounds needed to sow all the seeds
        rounds = seeds_to_sow // receiving_holes
        # Seeds remaining after all the rounds
        remaining_seeds = seeds_to_sow % receiving_holes

        # Sow the seeds for the full rounds
        if rounds != 0:
            for hole in range(1, holes + 1):
                board.add_seeds(Side.NORTH, hole, rounds)
                board.add_seeds(Side.SOUTH, hole, rounds)
            board.add_seeds_to_store(move.side, rounds)

        # Sow the remaining seeds
        sow_side = move.side
        sow_hole = move.index
        for _ in range(remaining_seeds):
            sow_hole += 1
            if sow_hole == 1:
                sow_side = Side.opposite(sow_side)
            if sow_hole > holes:
                if sow_side == move.side:
                    sow_hole = 0
                    board.add_seeds_to_store(sow_side, 1)
                    continue
                else:
                    sow_side = Side.opposite(sow_side)
                    sow_hole = 1
            board.add_seeds(sow_side, sow_hole, 1)

        # Capture the opponent's seeds from the opposite hole if the last seed
        # is placed in an empty hole and there are seeds in the opposite hole
        if sow_side == move.side and sow_hole > 0 \
                and board.get_seeds(sow_side, sow_hole) == 1 \
                and board.get_seeds_op(sow_side, sow_hole) > 0:
            board.add_seeds_to_store(
                move.side, 1 + board.get_seeds_op(sow_side, sow_hole))
            board.set_seeds(move.side, sow_hole, 0)
            board.set_seeds_op(move.side, sow_hole, 0)

        # If the game is over, collect the seeds not in the store and put them there
        game_over = MancalaEnv.game_over(board)
        if game_over:
            finished_side = Side.NORTH if MancalaEnv.holes_empty(
                board, Side.NORTH) else Side.SOUTH
            seeds = 0
            collecting_side = Side.opposite(finished_side)
            for hole in range(1, board.holes + 1):
                seeds += board.get_seeds(collecting_side, hole)
                board.set_seeds(collecting_side, hole, 0)
            board.add_seeds_to_store(collecting_side, seeds)

        # Return the side which is next to move
        if sow_hole == 0 and (move.side == Side.NORTH or north_moved):
            return move.side  # Last seed was placed in the store, so side moves again
        return Side.opposite(move.side)