def self_play(storage, player1, player2=None, explore=True, num_games=1, joseki=False): for n in range(num_games): if not parallell: print("Self-play game: %s" % n) #Handle the fact that ordinary self-play uses a single tree structure #whereas evaluation uses two different ones if player2 != None: evaluation = True else: evaluation = False #Initialize game structure game = santorini.Game() #Initialize players with networks and tree structures. Make the structures #globally available to facilitiate inspection or debugging p1 = M.MCTS(game, player1, sess, explore) global P1 P1 = p1 if player2 != None: evaluation = True p2 = M.MCTS(game, player2, sess, explore) global P2 P2 = p2 players = [p1, p2] else: evaluation = False #Store state history, but don't add it to global history yet as we need #to know the outcome first temp_history = [] done = False while done == False: if evaluation: player = game.turn_count%2 tree = players[player] other_tree = players[(player+1)%2] else: tree = p1 #Execute tree search and make move t0 = time.time() done = tree.consider_resigning(v_resign, observe_games) a, pi_s, P, v = tree.run_simulation(search_depth) temp_history.extend([[game.stack_s(), pi_s, game.legal_moves(binaryV=True)]]) if evaluation: #This is not very neat, and I should fix it up at some point... other_tree.prepare_adversarial_move(a) game.move(a) done = game.done if evaluation: other_tree.finish_adversarial_move(a) tree.prepare_next_move() if observe_games: for i in range(10): print("\n") print("P (predicted tree search probs):\n%s\n\n" % np.reshape(P, [5,5]), "pi (actual tree search probs):\n%s\n\n" % np.reshape(pi_s, [5,5]), "v: %s\n" % v, "Chosen move: %s\n" % a, "Overall game state:\n%s\n\n" % game.render()) print("time: ", time.time()-t0) z = game.outcome #store data t = len(temp_history) for entry in temp_history: storage.add(entry[0], entry[1], discount_rs(z, t), entry[2]) t -= 1 return z