コード例 #1
0
def expectiminimax(state, depth):
    """
    Returns the expectiminimax value of a state down to a certain depth according to
    some evaluation function. Recurs by calling calculate_value on possible transitions.
    The calculate_value function acts as the algorithm's "chance node."  
    """
    if depth == 0:
        return evaluate(state)

    winner = state.battle_is_finished()
    if winner:
        if winner == 1:  #we won
            return 10000
        else:
            return -10000

    else:
        transitions = get_transitions(state)
        value_of_transisitons = {}
        for transition in transitions:
            value_of_transisitons[transition] = calculate_value(
                state, transition, depth - 1)

        move, value = get_dominant_move(
            generate_payoff_matrix(value_of_transisitons))

        return value
コード例 #2
0
ファイル: main.py プロジェクト: saifshaikh48/showdown
 def run(self, times):
     """
     Top level function that samples the tree the given number of times
     Params:
         - times: number of times to sample this tree
     """
     for sample in range(times):
         self.sample(evaluate(self.state))
コード例 #3
0
    async def find_best_move(
        self,
        agent=None
    ):  # calls best_move to start even when it does not go first?
        state = self.create_state()
        my_options = self.get_all_options()[
            0]  # all valid actions, already accounts for struggle and switches

        # all switch options, even if fainted or self
        all_switches = []
        for pkmn in self.all_pokemon:
            all_switches.append("{} {}".format(constants.SWITCH_STRING,
                                               pkmn.name))

        # Get all moves and switches, not being used right now
        # e.g. volt switch
        moves = []
        switches = []
        for option in my_options:
            if option.startswith(constants.SWITCH_STRING + " "):
                switches.append(option)
            else:
                moves.append(option)

        if self.force_switch or not moves:
            return safest.find_best_move(self)
            # return format_decision(self, switches[0])

        # convert state to matrix
        matrix = self.state_to_vector()
        # totalEnemyHealth = evaluate2(state)
        reward = evaluate(state)
        # Calculate New Reward
        if agent.previous_state is not None:
            await agent.step(agent.previous_state, agent.previous_action,
                             (reward - agent.previous_reward) / 2000, matrix,
                             False)
            # await agent.step(agent.previous_state, agent.previous_action, (agent.previous_reward - totalEnemyHealth)/6, matrix, False)

        # pass through network and return choice
        idx, choice = agent.act(matrix, my_options, all_switches)
        agent.set_previous(matrix, idx, reward)

        return format_decision(self, choice)
コード例 #4
0
ファイル: main.py プロジェクト: saifshaikh48/showdown
    def sample(self, initial_position, depth=0):
        """
        Selects a new node to add to a montecarlo search tree. If a there are unexplored
        transitions it opts to randomly choose one of those first. Otherwise, it 
        recurs on its child node with highest UCB value. Once the node is selected a
        random playout is run. If the result  of the playout is a win or leads to
        a favorable position, then a win is back propagated up the tree.
        """
        self.total += 1

        if depth == MAX_DEPTH:
            if evaluate(self.state) >= initial_position:
                self.wins += 1
                return True
            else:
                return False

        winner = self.state.battle_is_finished()
        if winner:
            if winner == 1:
                self.wins += 1
                return True
            else:
                return False

        if len(self.children.keys()) == len(self.transitions):
            # there are no unexplored transitions
            next_child = self.get_highest_ucb()
            playout_successful = next_child.sample(initial_position, depth + 1)
        else:
            # generate a new node for a random unexplored transition
            unexplored_transitions = list(self.transitions -
                                          self.children.keys())
            chosen_transition = random.choice(unexplored_transitions)
            next_child = self.generate_next_child(chosen_transition)
            self.children[chosen_transition] = next_child
            playout_successful = next_child.random_playout(
                initial_position, depth + 1)

        if playout_successful:  #backprop via boolean return of child
            self.wins += 1
            return True

        return False
コード例 #5
0
ファイル: main.py プロジェクト: saifshaikh48/showdown
    def random_playout(self, initial_position, depth):
        """
        Random playout of from this node. If max depth is reached then
        the evaluation function of the state is compared against initial_position.
        If the evaluation of the state is better than the initial position, it is
        counted as win, since the bot position was improved.
        """
        self.total += 1

        mutator = StateMutator(copy.deepcopy(self.state))
        while True:
            if depth == MAX_DEPTH:
                if evaluate(mutator.state) >= initial_position:
                    self.wins += 1
                    return True
                else:
                    return False

            winner = mutator.state.battle_is_finished()
            if winner:
                if winner == 1:
                    self.wins += 1
                    return True
                else:
                    return False

            transition = random.choice(get_transitions(mutator.state))
            state_instructions = get_all_state_instructions(
                mutator, transition[0], transition[1])
            possible_instrucitons = [
                i.instructions for i in state_instructions
            ]
            weights = [i.percentage for i in state_instructions]
            choice = random.choices(possible_instrucitons, weights=weights)[0]
            mutator.apply(choice)

            depth += 1
コード例 #6
0
ファイル: select_best_move.py プロジェクト: andy-vh/showdown
def get_payoff_matrix(mutator, user_options, opponent_options, depth=2, prune=True):
    """
    :param mutator: a StateMutator object representing the state of the battle
    :param user_options: options for the bot
    :param opponent_options: options for the opponent
    :param depth: the remaining depth before the state is evaluated
    :param prune: specify whether or not to prune the tree
    :return: a dictionary representing the potential move combinations and their associated scores
    """

    winner = battle_is_over(mutator.state)
    if winner:
        return {(constants.DO_NOTHING_MOVE, constants.DO_NOTHING_MOVE): evaluate(mutator.state) + WON_BATTLE*depth*winner}

    depth -= 1

    # if the battle is not over, but the opponent has no moves - we want to return the user options as moves
    # this is a special case in a random battle where the opponent's pokemon has fainted, but the opponent still
    # has reserves left that are unseen
    if opponent_options == [constants.DO_NOTHING_MOVE] and mutator.state.opponent.active.hp == 0:
        return {(user_option, constants.DO_NOTHING_MOVE): evaluate(mutator.state) for user_option in user_options}

    state_scores = dict()

    best_score = float('-inf')
    for i, user_move in enumerate(user_options):
        worst_score_for_this_row = float('inf')
        skip = False

        # opponent_options can change during the loop
        # using opponent_options[:] makes a copy when iterating to ensure no funny-business
        for j, opponent_move in enumerate(opponent_options[:]):
            if skip:
                state_scores[(user_move, opponent_move)] = float('nan')
                continue

            score = 0
            state_instructions = get_all_state_instructions(mutator, user_move, opponent_move)
            if depth == 0:
                for instructions in state_instructions:
                    mutator.apply(instructions.instructions)
                    t_score = evaluate(mutator.state)
                    score += (t_score * instructions.percentage)
                    mutator.reverse(instructions.instructions)

            else:
                for instructions in state_instructions:
                    this_percentage = instructions.percentage
                    mutator.apply(instructions.instructions)
                    next_turn_user_options, next_turn_opponent_options = mutator.state.get_all_options()
                    safest = pick_safest(get_payoff_matrix(mutator, next_turn_user_options, next_turn_opponent_options, depth=depth, prune=prune))
                    score += safest[1] * this_percentage
                    mutator.reverse(instructions.instructions)

            state_scores[(user_move, opponent_move)] = score

            if score < worst_score_for_this_row:
                worst_score_for_this_row = score

            if prune and score < best_score:
                skip = True

                # MOST of the time in pokemon, an opponent's move that causes a prune will cause a prune elsewhere
                # move this item to the front of the list to prune faster
                opponent_options = move_item_to_front_of_list(opponent_options, opponent_move)

        if worst_score_for_this_row > best_score:
            best_score = worst_score_for_this_row

    return state_scores