コード例 #1
0
def evaluate_players(p1: Player,
                     p2: Player,
                     games_per_battle=100,
                     num_battles=100):
    board = Board()

    p1_wins = []
    p2_wins = []
    draws = []
    game_number = []
    game_counter = 0

    TFSessionManager.set_session(tf.Session())
    TFSessionManager.get_session().run(tf.global_variables_initializer())

    for i in range(num_battles):
        p1win, p2win, draw = battle(p1, p2, games_per_battle, False)
        p1_wins.append(p1win)
        p2_wins.append(p2win)
        draws.append(draw)
        game_counter = game_counter + 1
        game_number.append(game_counter)

    TFSessionManager.set_session(None)
    return game_number, p1_wins, p2_wins, draws
コード例 #2
0
ファイル: reward_sweep.py プロジェクト: zhengma/tic-tac-toe
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.DirectPolicyAgent import DirectPolicyAgent

min_reward = -3
max_reward = 3

num_reward_steps = 1 + max_reward - min_reward

rewards = np.zeros((num_reward_steps, num_reward_steps))

for loss_reward in range(min_reward, max_reward):
    for draw_reward in range(loss_reward + 1, max_reward + 1):

        tf.reset_default_graph()
        TFSessionManager.set_session(tf.Session())

        sess = TFSessionManager.get_session()

        nnplayer = DirectPolicyAgent("PolicyLearner1",
                                     loss_value=loss_reward,
                                     draw_value=draw_reward)
        rm_player = RndMinMaxAgent()

        sess.run(tf.global_variables_initializer())

        game_number, p1_wins, p2_wins, draws = evaluate_players(
            nnplayer, rm_player, num_battles=1000,
            silent=True)  # , num_battles = 20)

        print("With loss reward {} and draw reward {} we get draws: {}".format(