コード例 #1
0
def self_play(storage, player1, player2=None, explore=True, num_games=1, joseki=False):
    for n in range(num_games):
        if not parallell:
            print("Self-play game: %s" % n)
            
        #Handle the fact that ordinary self-play uses a single tree structure
        #whereas evaluation uses two different ones
        if player2 != None: evaluation = True
        else: evaluation = False
        
        #Initialize game structure
        game = santorini.Game() 

        #Initialize players with networks and tree structures. Make the structures
        #globally available to facilitiate inspection or debugging
        p1 = M.MCTS(game, player1, sess, explore) 
        global P1
        P1 = p1
        
        if player2 != None:
            evaluation = True
            p2 = M.MCTS(game, player2, sess, explore) 
            global P2
            P2 = p2
            players = [p1, p2]
        else:
            evaluation = False
                   
        #Store state history, but don't add it to global history yet as we need 
        #to know the outcome first
        temp_history = []
    
        done = False
        while done == False:
            if evaluation:
                player = game.turn_count%2    
                tree = players[player]
                other_tree = players[(player+1)%2]
            else:
                tree = p1
                
            #Execute tree search and make move
            t0 = time.time()
            done = tree.consider_resigning(v_resign, observe_games)  
            a, pi_s, P, v = tree.run_simulation(search_depth)  
            temp_history.extend([[game.stack_s(), pi_s, game.legal_moves(binaryV=True)]])
            if evaluation: #This is not very neat, and I should fix it up at some point...
                other_tree.prepare_adversarial_move(a)
            game.move(a)
            done = game.done
            if evaluation:
                other_tree.finish_adversarial_move(a)
            tree.prepare_next_move()
    
            if observe_games:
                for i in range(10):
                    print("\n")
                print("P (predicted tree search probs):\n%s\n\n" % np.reshape(P, [5,5]),
                      "pi (actual tree search probs):\n%s\n\n" % np.reshape(pi_s, [5,5]),
                      "v: %s\n" % v, 
                      "Chosen move: %s\n" % a,
                      "Overall game state:\n%s\n\n" % game.render())
                print("time: ", time.time()-t0)
        z = game.outcome
        
        #store data
        t = len(temp_history)
        for entry in temp_history:
            storage.add(entry[0], entry[1], discount_rs(z, t), entry[2])
            t -= 1
        
    return z