Beispiel #1
0
def play_n_plot():
    r1 = RandomPlayer()
    r2 = RandomPlayer()
    game_number, p1_wins, p2_wins, draws = evaluate_players(r1,
                                                            r2,
                                                            num_battles=100)

    p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-',
                 game_number, p2_wins, 'b-')

    plt.show()
Beispiel #2
0
    for draw_reward in range(loss_reward + 1, max_reward + 1):

        tf.reset_default_graph()
        TFSessionManager.set_session(tf.Session())

        sess = TFSessionManager.get_session()

        nnplayer = DirectPolicyAgent("PolicyLearner1",
                                     loss_value=loss_reward,
                                     draw_value=draw_reward)
        rm_player = RndMinMaxAgent()

        sess.run(tf.global_variables_initializer())

        game_number, p1_wins, p2_wins, draws = evaluate_players(
            nnplayer, rm_player, num_battles=1000,
            silent=True)  # , num_battles = 20)

        print("With loss reward {} and draw reward {} we get draws: {}".format(
            loss_reward, draw_reward, draws[-1]))

        rewards[loss_reward - min_reward, draw_reward - min_reward] = draws[-1]

        TFSessionManager.set_session(None)

fig, ax = plt.subplots()
im = ax.imshow(rewards)

reward_range = np.arange(num_reward_steps + 1)

# We want to show all ticks...
Beispiel #3
0
# nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
# nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
mm_player = MinMaxAgent()
rndplayer = RandomPlayer()
rm_player = RndMinMaxAgent()

TFSessionManager.set_session(tf.Session())

sess = TFSessionManager.get_session()
writer = tf.summary.FileWriter(TENSORLOG_DIR, sess.graph)
nnplayer.writer = writer

sess.run(tf.global_variables_initializer())

# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300)  # , num_battles = 20)
game_number, p1_wins, p2_wins, draws = evaluate_players(
    nnplayer, rm_player, num_battles=1000,
    writer=writer)  # , num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(nnplayer, rndplayer, num_battles=100)  # , num_battles = 20)

# game_number, p1_wins, p2_wins, draws = evaluate_players(mm_player, nn2player, num_battles=100)  # , num_battles = 20)
writer.close()

p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number,
             p2_wins, 'b-')

plt.show()
TFSessionManager.set_session(None)
Beispiel #4
0
if not train:
    TFSessionManager.load_session('models/SimpleNNQPlayer')

sess = TFSessionManager.get_session()

if train:
    sess.run(tf.global_variables_initializer())

# num battles
nb = 500
# games per battle
gpb = 100

game_number, p1_wins, p2_wins, draws = evaluate_players(dddplayer,
                                                        rmmplayer,
                                                        num_battles=nb,
                                                        games_per_battle=gpb)

if train:
    TFSessionManager.save_session('models/models_session2')

plt.plot(game_number, draws, color=(0.7, 0.7, 0.7), label='draws')
plt.plot(game_number, p1_wins, 'r-', label='player 1')
plt.plot(game_number, p2_wins, 'y-', label='player 2')
plt.xlabel('battle iterations ({} games per battle)'.format(gpb))
plt.ylabel('battle winning ratio (%)')
plt.legend(loc='best')

plt.show()
TFSessionManager.set_session(None)
Beispiel #5
0
import os
import random

print('start')
pickle_in = open('player1.pickle', 'rb')
qvals = pickle.load(pickle_in)
print(os.stat('player1.pickle').st_size / (1024 * 1024))
print(qvals[(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4)])
total = 0
for key in qvals.keys():
    total += qvals[key].count(0)
print(total / len(qvals))

discreetPlayer = DiscreetQValuesPlayer(training=False, qValues=qvals)
rndplayer = RandomPlayer()
game_number, p1_wins, p2_wins, draws, allPos = evaluate_players(
    discreetPlayer, rndplayer, games_per_battle=100000, num_battles=1)
total = 0
numSeen = 0
numNotSeen = 0
for key in allPos:
    if key in qvals:
        total += qvals[key].count(0)
        numSeen += 1
    else:
        numNotSeen += 1
print(total / numSeen)
print(numSeen)
print(numNotSeen)
print(numSeen / (numNotSeen + numSeen))
for i in range(3):
    print(random.choice(tuple(allPos)))
Beispiel #6
0
from tic_tac_toe.ExpDoubleDuelQPlayer import ExpDoubleDuelQPlayer

tf.reset_default_graph()

nnplayer = ExpDoubleDuelQPlayer(
    "QLearner1")  # , win_value=100.0, loss_value=-100.0)
# nn2player = EGreedyNNQPlayer("QLearner2", win_value=100.0, loss_value=-100.0)
# nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
# nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
mm_player = MinMaxAgent()
rndplayer = RandomPlayer()
rm_player = RndMinMaxAgent()

TFSessionManager.set_session(tf.Session())
TFSessionManager.get_session().run(tf.global_variables_initializer())

# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300)  # , num_battles = 20)
game_number, p1_wins, p2_wins, draws = evaluate_players(
    rm_player, nnplayer, num_battles=300)  # , num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(nnplayer, rndplayer, num_battles=100)  # , num_battles = 20)

# game_number, p1_wins, p2_wins, draws = evaluate_players(mm_player, nn2player, num_battles=100)  # , num_battles = 20)

p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number,
             p2_wins, 'b-')

plt.show()
TFSessionManager.set_session(None)
Beispiel #7
0
            self.random_move_prob *= self.random_move_decrease

            if self.writer is not None:
                self.writer.add_summary(summary, self.game_counter)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag='Random_Move_Probability',
                                     simple_value=self.random_move_prob)
                ])
                self.writer.add_summary(summary, self.game_counter)

            TFSN.get_session().run(self.graph_copy_op)


tf.reset_default_graph()

nnplayer = deepPlayer("QLearner1")
rndplayer = RandomPlayer()

TFSN.set_session(tf.Session())
TFSN.get_session().run(tf.global_variables_initializer())

game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer,
                                                        nnplayer,
                                                        games_per_battle=1000,
                                                        num_battles=5)

p = plt.plot(game_number, p1_wins, 'g-', game_number, p2_wins, 'b-')

plt.show()
TFSN.set_session(None)