def get_play(self, statcache): """ Return the best play, after simulating possible plays and updating plays and wins stats. """ game_state = statcache.past_states[-1] pwp = Game.player_with_priority(game_state) legal = Game.legal_plays(game_state) # Bail out early if there is no real choice to be made. if not legal: return [] if len(legal) == 1: return legal[0] games = 0 begin = datetime.datetime.utcnow() spinner = itertools.cycle(['-', '/', '|', '\\']) sys.stdout.write("Thinking ") while datetime.datetime.utcnow() - begin < self.calculation_time: self.run_simulation(statcache) sys.stdout.write(next(spinner)) sys.stdout.flush() sys.stdout.write('\b') games += 1 first_moving = Game.player_with_priority(game_state) == 0 if True or first_moving: print("SIMULATED {} playouts/s ({} playouts)".format( games * 1.0 / self.simulation_time, games)) CURSOR_UP_ONE = '\x1b[1A' ERASE_LINE = '\x1b[2K' if first_moving: print(ERASE_LINE + CURSOR_UP_ONE) moves_states = [] game_state = Game.set_print_moves(game_state, False) for p in legal: new_state = Game.apply_move(game_state, p) new_state = decarded_state(new_state) moves_states.append((p, tuple(new_state))) game_state = Game.set_print_moves(game_state, True) player = Game.acting_player(game_state) # Pick the move with the highest percentage of wins. percent_wins, move = max( (statcache.bot_stats(pwp).wins.get((player, S), 0) * 1.0 / statcache.bot_stats(pwp).plays.get((player, S), 1), p) for p, S in moves_states) ''' if self.show_simulation_results: # Display the stats for each possible play. for x in sorted( ((100 * statcache.bot_stats(pwp).wins.get((player, S), 0) * 1.0 / statcache.bot_stats(pwp).plays.get((player, S), 1), statcache.bot_stats(pwp).wins.get((player, S), 0), statcache.bot_stats(pwp).plays.get((player, S), 0), p) for p, S in moves_states), reverse=True ): print("{3}: {0:.2f}% ({1} / {2})".format(*x)) ''' return move
def run_simulation(self, statcache): state = statcache.past_states[-1] state = Game.set_print_moves(state, False) pwp = Game.player_with_priority(state) first_moving = Game.player_with_priority(state) == 0 # A bit of an optimization here, so we have a local # variable lookup instead of an attribute access each loop. plays, wins, legal_moves_cache = \ statcache.bot_stats(pwp).plays, \ statcache.bot_stats(pwp).wins, \ statcache.bot_stats(pwp).legal_moves_cache visited_states = set() player = Game.acting_player(state) expand = True for t in range(1, self.max_moves + 1): if state not in legal_moves_cache: legal_moves_cache[state] = Game.legal_plays(state) legal = legal_moves_cache[state] moves_states = [] play_randomly = False for p in legal: if (p[1], state) in plays: new_state = Game.apply_move(state, p) moves_states.append((p, new_state, new_state)) else: play_randomly = True break if play_randomly: move = choice(legal) state = Game.apply_move(state, move) elif all(plays.get((player, S)) for p, S in moves_states): # If we have stats on all of the legal moves here, use them. log_total = log( sum(plays[(player, S)] for p, S in moves_states)) value, move, state = max( ((wins[(player, S)] / plays[(player, S)]) + self.C * sqrt(log_total / plays[(player, S)]), p, S) for p, S, ended_game in moves_states) else: # Otherwise, just make an arbitrary decision. move, state, ended_game = choice(moves_states) # `player` here and below refers to the player # who moved into that particular state. state_clone = decarded_state(state) # print("moving {}".format(move)) # print("moving {} to state {}".format(move, state_clone)) if expand and (player, state_clone) not in plays: expand = False plays[(player, state_clone)] = 0 wins[(player, state_clone)] = 0 visited_states.add((player, state_clone)) player = Game.acting_player(state) winner = Game.winner(state) if winner >= 0: break for player, state in visited_states: if (player, state) not in plays: continue plays[(player, state)] += 1 if player == winner: wins[(player, state)] += 1