Exemple #1
0
    def step(self, move=None):
        """
        Perform one step of the environment: agents in turn choose a move
        :param move: externally determined move to be performed by agent0 (useful for training)
        :return: reward accumulated in this step, boolean: if environment in terminal state, boolean: if agent0 won
        """
        self.reward = 0
        self.steps += 1  # illegal move as step

        # are there still pieces to be moved?
        if not helpers.get_poss_moves(self.board, team=0):
            self.reward += self.reward_loss
            self.score += self.reward
            return self.reward, True, -2  # 0 for lost

        # move decided by agent or externally?
        if move is not None:
            agent_move = move  # this enables working with the environment in external functions (e.g. train.py)
        else:
            agent_move = self.agents[0].decide_move()

        # is move legal?
        if not helpers.is_legal_move(
                self.board, agent_move
        ):  # if illegal -> no change in env, receive reward_illegal
            self.reward += self.reward_illegal
            self.illegal_moves += 1
            # print("Warning: agent 1 selected an illegal move: {}".format(agent_move))
            self.score += self.reward
            done, won = self.goal_test()
            return self.reward, done, won  # environment does not change for illegal
        self.do_move(agent_move, team=0)
        self.move_count += 1

        # opponents move
        if self.opp_can_move:  # only if opponent is playing, killing his pieces wins (opponent can be e.g. flag only)

            # are there still pieces to be moved?
            if not helpers.get_poss_moves(self.board, team=1):
                self.reward += self.reward_win
                self.score = self.reward
                return self.reward, True, 2  # 1 for won
            opp_move = self.agents[1].decide_move()

            # is move legal?
            if not helpers.is_legal_move(
                    self.board, opp_move
            ):  # opponent is assumed to only perform legal moves
                pass
                # print("Warning: agent 1 selected an illegal move: {}".format(opp_move))

            self.do_move(opp_move,
                         team=1)  # assuming only legal moves selected
            self.move_count += 1

        done, won = self.goal_test()
        self.score += self.reward
        return self.reward, done, -1 + 2 * won
Exemple #2
0
    def min_val(self, board, current_reward, alpha, beta, depth):
        """
        Step of the minimizing player in the minimax algorithm. See max_val for documentation.
        """
        # this is what the opponent will think, the min-player

        # get my possible actions, then shuffle them to ensure randomness when no action
        # stands out as the best
        my_doable_actions = helpers.get_poss_moves(board, self.other_team)
        np.random.shuffle(my_doable_actions)

        # check for terminal-state scenario or maximum depth
        done, won = self.goal_test(my_doable_actions, board, max_val=False)
        if done or depth == 0:
            return current_reward + self.get_terminal_reward(done, won, depth), None

        val = float('inf')  # initial value set, so min comparison later possible
        best_action = None
        # iterate through all actions
        for action in my_doable_actions:
            board, fight_result = self.do_move(action, board=board, bookkeeping=False, true_gameplay=False)
            temp_reward = current_reward - self.add_temp_reward(fight_result)
            new_val = self.max_val(board, temp_reward, alpha, beta, depth-1)[0]
            if val > new_val:
                val = new_val
                best_action = action
            if val <= alpha:
                self.undo_last_move(board)
                return val, best_action
            beta = min(beta, val)
            board = self.undo_last_move(board)
        return val, best_action
Exemple #3
0
 def poss_actions(self, action_dim):
     """
     Converting set of possible moves in the whole game to a set of actions for the agent
     :param action_dim: how many actions are possible for agent
     :return: list of legal actions
     """
     poss_moves = helpers.get_poss_moves(self.board, self.team)  # which moves are possible in the game
     poss_actions = []
     all_actions = range(0, action_dim)
     for action in all_actions:
         move = self.action_to_move(action)  # converting all actions to moves (which can be illegal)
         if move in poss_moves:              # only select legal moves among them
             poss_actions.append(action)
     return poss_actions
Exemple #4
0
 def decide_move(self):
     """
     given the maximum depth, copy the known board so far, assign the pieces by random, while still
     respecting the current knowledge, and then decide the move via minimax algorithm.
     :return: tuple of position tuples
     """
     possible_moves = helpers.get_poss_moves(self.board, self.team)
     next_action = None
     if possible_moves:
         values_of_moves = dict.fromkeys(possible_moves, 0)
         for move in possible_moves:
             for draw in range(self._nr_of_enemy_setups_to_draw):
                 curr_board = self.draw_consistent_enemy_setup(copy.deepcopy(self.board))
                 curr_board, _ = self.do_move(move, curr_board, bookkeeping=False, true_gameplay=False)
                 values_of_moves[move] += self.approximate_value_of_board(curr_board) / self._nr_of_enemy_setups_to_draw
                 self.undo_last_move(curr_board)
         evaluations = list(values_of_moves.values())
         actions = list(values_of_moves.keys())
         next_action = actions[evaluations.index(max(evaluations))]
     return next_action
Exemple #5
0
    def max_val(self, board, current_reward, alpha, beta, depth):
        """
        Do the max players step in the minimax algorithm. Check first if the given board is in
        a terminal state. If not, we will do each possible move once and send the process to
        min_val to do the min players step.
        :param board: the current board, numpy array
        :param current_reward: the current value the path has accumulated
        :param alpha: alpha threshold of the minimax alg
        :param beta: beta threshold of the minimax alg
        :param depth: the depth the process is at, integer
        :return: tuple of best value, and a associated best_action (float, tuple)
        """
        # this is what the expectimax agent will think

        # get my possible actions, then shuffle them to ensure randomness when no action
        # stands out as the best
        my_doable_actions = helpers.get_poss_moves(board, self.team)
        np.random.shuffle(my_doable_actions)

        # check for terminal-state scenario
        done, won = self.goal_test(my_doable_actions, board, max_val=True)
        if done or depth == 0:
            return current_reward + self.get_terminal_reward(done, won, depth), None

        val = -float('inf')
        best_action = None
        for action in my_doable_actions:
            board, fight_result = self.do_move(action, board=board, bookkeeping=False, true_gameplay=False)
            temp_reward = current_reward + self.add_temp_reward(fight_result)
            new_val = self.min_val(board, temp_reward, alpha, beta, depth-1)[0]
            if val < new_val:
                val = new_val
                best_action = action
            if val >= beta:
                self.undo_last_move(board)
                best_action = action
                return val, best_action
            alpha = max(alpha, val)
            board = self.undo_last_move(board)
        return val, best_action
Exemple #6
0
 def approximate_value_of_board(self, board):
     """
     Simulate the game to the max number of turns a lot of times and evaluating the simulation
     by whether he won and how many more pieces he has left than the opponent.
     :param board:
     :return:
     """
     finished = False
     turn = 0
     evals = []
     for i in range(self._nr_iterations_of_game_sim):
         board_copy = copy.deepcopy(board)
         while not finished:
             actions = helpers.get_poss_moves(board_copy, turn)
             if actions:  # as long as actions are left to be done, we do them
                 move = random.choice(actions)
                 board_copy, _ = self.do_move(move, board_copy)
             # check whether the game is terminal
             done, won = self.goal_test(actions, board_copy, turn)
             if done:
                 # if terminal, calculate the bonus we want to reward this simulation with
                 my_team = self.get_team_from_board(board, self.team)
                 enemy_team = self.get_team_from_board(board, self.other_team)
                 bonus = (len(my_team) - len(enemy_team)) / 20
                 # -1+2*won equals -1+2*0=-1 for won=False, and -1+2*1=1 for won=True
                 # bonus is negative if enemy team has more pieces
                 evals.append(-1 + 2 * won + bonus)
                 finished = True
             elif turn > self._nr_of_max_turn_sim:  # check if we reached the max number of turns
                 # calculate bonus
                 my_team = self.get_team_from_board(board, self.team)
                 enemy_team = self.get_team_from_board(board, self.other_team)
                 bonus = (len(my_team) - len(enemy_team)) / 20
                 # -1+2*won equals -1+2*0=-1 for won=False, and -1+2*1=1 for won=True
                 # bonus is negative if enemy team has more pieces
                 evals.append(bonus)
                 finished = True
             turn = (turn + 1) % 2
     return sum(evals)/len(evals)
Exemple #7
0
 def decide_move(self):
     actions = helpers.get_poss_moves(self.board, self.team)
     if not actions:
         return None
     else:
         return random.choice(actions)