def expectiminimax(state, depth): """ Returns the expectiminimax value of a state down to a certain depth according to some evaluation function. Recurs by calling calculate_value on possible transitions. The calculate_value function acts as the algorithm's "chance node." """ if depth == 0: return evaluate(state) winner = state.battle_is_finished() if winner: if winner == 1: #we won return 10000 else: return -10000 else: transitions = get_transitions(state) value_of_transisitons = {} for transition in transitions: value_of_transisitons[transition] = calculate_value( state, transition, depth - 1) move, value = get_dominant_move( generate_payoff_matrix(value_of_transisitons)) return value
def run(self, times): """ Top level function that samples the tree the given number of times Params: - times: number of times to sample this tree """ for sample in range(times): self.sample(evaluate(self.state))
async def find_best_move( self, agent=None ): # calls best_move to start even when it does not go first? state = self.create_state() my_options = self.get_all_options()[ 0] # all valid actions, already accounts for struggle and switches # all switch options, even if fainted or self all_switches = [] for pkmn in self.all_pokemon: all_switches.append("{} {}".format(constants.SWITCH_STRING, pkmn.name)) # Get all moves and switches, not being used right now # e.g. volt switch moves = [] switches = [] for option in my_options: if option.startswith(constants.SWITCH_STRING + " "): switches.append(option) else: moves.append(option) if self.force_switch or not moves: return safest.find_best_move(self) # return format_decision(self, switches[0]) # convert state to matrix matrix = self.state_to_vector() # totalEnemyHealth = evaluate2(state) reward = evaluate(state) # Calculate New Reward if agent.previous_state is not None: await agent.step(agent.previous_state, agent.previous_action, (reward - agent.previous_reward) / 2000, matrix, False) # await agent.step(agent.previous_state, agent.previous_action, (agent.previous_reward - totalEnemyHealth)/6, matrix, False) # pass through network and return choice idx, choice = agent.act(matrix, my_options, all_switches) agent.set_previous(matrix, idx, reward) return format_decision(self, choice)
def sample(self, initial_position, depth=0): """ Selects a new node to add to a montecarlo search tree. If a there are unexplored transitions it opts to randomly choose one of those first. Otherwise, it recurs on its child node with highest UCB value. Once the node is selected a random playout is run. If the result of the playout is a win or leads to a favorable position, then a win is back propagated up the tree. """ self.total += 1 if depth == MAX_DEPTH: if evaluate(self.state) >= initial_position: self.wins += 1 return True else: return False winner = self.state.battle_is_finished() if winner: if winner == 1: self.wins += 1 return True else: return False if len(self.children.keys()) == len(self.transitions): # there are no unexplored transitions next_child = self.get_highest_ucb() playout_successful = next_child.sample(initial_position, depth + 1) else: # generate a new node for a random unexplored transition unexplored_transitions = list(self.transitions - self.children.keys()) chosen_transition = random.choice(unexplored_transitions) next_child = self.generate_next_child(chosen_transition) self.children[chosen_transition] = next_child playout_successful = next_child.random_playout( initial_position, depth + 1) if playout_successful: #backprop via boolean return of child self.wins += 1 return True return False
def random_playout(self, initial_position, depth): """ Random playout of from this node. If max depth is reached then the evaluation function of the state is compared against initial_position. If the evaluation of the state is better than the initial position, it is counted as win, since the bot position was improved. """ self.total += 1 mutator = StateMutator(copy.deepcopy(self.state)) while True: if depth == MAX_DEPTH: if evaluate(mutator.state) >= initial_position: self.wins += 1 return True else: return False winner = mutator.state.battle_is_finished() if winner: if winner == 1: self.wins += 1 return True else: return False transition = random.choice(get_transitions(mutator.state)) state_instructions = get_all_state_instructions( mutator, transition[0], transition[1]) possible_instrucitons = [ i.instructions for i in state_instructions ] weights = [i.percentage for i in state_instructions] choice = random.choices(possible_instrucitons, weights=weights)[0] mutator.apply(choice) depth += 1
def get_payoff_matrix(mutator, user_options, opponent_options, depth=2, prune=True): """ :param mutator: a StateMutator object representing the state of the battle :param user_options: options for the bot :param opponent_options: options for the opponent :param depth: the remaining depth before the state is evaluated :param prune: specify whether or not to prune the tree :return: a dictionary representing the potential move combinations and their associated scores """ winner = battle_is_over(mutator.state) if winner: return {(constants.DO_NOTHING_MOVE, constants.DO_NOTHING_MOVE): evaluate(mutator.state) + WON_BATTLE*depth*winner} depth -= 1 # if the battle is not over, but the opponent has no moves - we want to return the user options as moves # this is a special case in a random battle where the opponent's pokemon has fainted, but the opponent still # has reserves left that are unseen if opponent_options == [constants.DO_NOTHING_MOVE] and mutator.state.opponent.active.hp == 0: return {(user_option, constants.DO_NOTHING_MOVE): evaluate(mutator.state) for user_option in user_options} state_scores = dict() best_score = float('-inf') for i, user_move in enumerate(user_options): worst_score_for_this_row = float('inf') skip = False # opponent_options can change during the loop # using opponent_options[:] makes a copy when iterating to ensure no funny-business for j, opponent_move in enumerate(opponent_options[:]): if skip: state_scores[(user_move, opponent_move)] = float('nan') continue score = 0 state_instructions = get_all_state_instructions(mutator, user_move, opponent_move) if depth == 0: for instructions in state_instructions: mutator.apply(instructions.instructions) t_score = evaluate(mutator.state) score += (t_score * instructions.percentage) mutator.reverse(instructions.instructions) else: for instructions in state_instructions: this_percentage = instructions.percentage mutator.apply(instructions.instructions) next_turn_user_options, next_turn_opponent_options = mutator.state.get_all_options() safest = pick_safest(get_payoff_matrix(mutator, next_turn_user_options, next_turn_opponent_options, depth=depth, prune=prune)) score += safest[1] * this_percentage mutator.reverse(instructions.instructions) state_scores[(user_move, opponent_move)] = score if score < worst_score_for_this_row: worst_score_for_this_row = score if prune and score < best_score: skip = True # MOST of the time in pokemon, an opponent's move that causes a prune will cause a prune elsewhere # move this item to the front of the list to prune faster opponent_options = move_item_to_front_of_list(opponent_options, opponent_move) if worst_score_for_this_row > best_score: best_score = worst_score_for_this_row return state_scores