Exemple #1
0
def min_max_alpha_beta(game, turn, max_depth, alpha=-sys.float_info.max, beta=sys.float_info.max):
    best_score_move = None
    game.available_moves()
    moves = game.moves

    if not moves:
        return 0, None

    for move in moves:
        new_game = Checkers()
        new_game.board_state = game.board_state
        new_game.turn = game.turn
        new_game.moves_queen_with_out_capture = game.moves_queen_with_out_capture
        new_game.move(move)
        winner = new_game.win
        if winner != 0:
            return winner*10000, move
        else:
            if max_depth <= 1:
                score = evaluate(new_game)
            else:
                score, _ = min_max_alpha_beta(new_game, -turn, max_depth-1, alpha, beta)
            if turn > 0:
                if score > alpha:
                    alpha = score
                    best_score_move = move
            else:
                if score < beta:
                    beta = score
                    best_score_move = move
            if alpha >= beta:
                break
    return alpha if turn > 0 else beta, best_score_move
Exemple #2
0
from dqn import DQN
from coach import Coach
from game import Checkers
from arena import Arena

import torch
from tqdm import tqdm

game = Checkers()
dqn = DQN()

NUM_EPOCHS = 80
NUM_TRAIN_GAMES = 200
NUM_EVAL_GAMES = 20
HALF = int(NUM_EVAL_GAMES/2)
TRAIN_EPSILON = 0.4
GAMMA = 0.8
EVAL_EPSILON = 0.01
LR = 0.0001
PATH = "models/test.pt"

coach = Coach(
	game=game,
	nnet=dqn,
	lr=LR)

num_improvements = 0
for _ in range(NUM_EPOCHS):
	## Saving a copy of the current network weights to pnet
	torch.save(coach.nnet.state_dict(), PATH)
	coach.pnet.load_state_dict(torch.load(PATH))
Exemple #3
0
from game import Checkers
from dqn import DQN

import torch

PATH = "models/30Improve.pt"
dqn = DQN()
dqn.load_state_dict(torch.load(PATH))

game = Checkers()
game.reset()

board = game.board
## Display
print(board)

color = 'red'
while not game.get_game_ended(board, color):
    actions = game.get_possible_actions(board, color)
    state = game.get_state(board, color)
    state_actions = game.make_inputs(state, actions)
    if color == 'black':
        print("Agent turn")
        print("Agent possible actions")

        out = dqn(state_actions)
        for a, q in zip(actions, out):
            print(game.flip_action(a), q.item())
        print(torch.max(out))
        idx = torch.argmax(out)
        action = actions[idx]
Exemple #4
0
    with tf.Session() as session:
        model1.set_session(session)
        model2.set_session(session)
        session.run(tf.global_variables_initializer())
        GAME.reset()

        wins = 0
        for i in range(MIN_EXPERIENCES):
            GAME.available_moves()
            if GAME.win != 0:
                GAME.reset()
            move = random_play(GAME)
            action = encoding_move(move)
            GAME.move(move)
            if GAME.win == 0:
                new_GAME = Checkers()
                new_GAME.board_state = np.array(GAME.board_state)
                new_GAME.turn = GAME.turn
                new_GAME.moves_queen_with_out_capture = GAME.moves_queen_with_out_capture
                move = min_max.min_max_player(new_GAME, new_GAME.turn)
                GAME.move(move)
            reward = GAME.win
            experience_replay_buffer.add_experince(action, GAME.board_state,
                                                   reward)

        t0 = datetime.now()
        for i in range(num_episodes):
            total_t, episode_reward, duration, num_steps_in_episode, time_per_step, epsilon = play_one(
                total_t, experience_replay_buffer, model1, model2, epsilon)
            episode_rewards[i] = episode_reward
Exemple #5
0
 def setUp(self):
     self.test_Game = Checkers()
Exemple #6
0
class TestBoard(unittest.TestCase):
    def setUp(self):
        self.test_Game = Checkers()

    def test_move(self):
        self.test_Game.move((5, 0, 4, 1))
        print('move')
        expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                    [-1, 0, -1, 0, -1, 0, -1, 0],
                                    [0, -1, 0, -1, 0, -1, 0, -1],
                                    [0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 1, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 1, 0, 1, 0, 1, 0],
                                    [0, 1, 0, 1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1, 0, 1, 0]])

        np.testing.assert_array_equal(expected_result,
                                      self.test_Game.board_state)

        self.test_Game.reset()
        print("capture")
        self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                               [-1, 0, -1, 0, -1, 0, -1, 0],
                                               [0, -1, 0, 0, 0, -1, 0, -1],
                                               [0, 0, -1, 0, -1, 0, 0, 0],
                                               [0, 1, 0, 0, 0, 0, 0, 0],
                                               [0, 0, 1, 0, 1, 0, 1, 0],
                                               [0, 1, 0, 1, 0, 1, 0, 1],
                                               [1, 0, 1, 0, 1, 0, 1, 0]])

        self.test_Game.move((4, 1, 2, 3))

        expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                    [-1, 0, -1, 0, -1, 0, -1, 0],
                                    [0, -1, 0, 1, 0, -1, 0, -1],
                                    [0, 0, 0, 0, -1, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 1, 0, 1, 0, 1, 0],
                                    [0, 1, 0, 1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1, 0, 1, 0]])

        np.testing.assert_array_equal(expected_result,
                                      self.test_Game.board_state)

        self.test_Game.reset()
        print("trying no capture")
        self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                               [-1, 0, -1, 0, -1, 0, -1, 0],
                                               [0, -1, 0, 0, 0, -1, 0, -1],
                                               [0, 0, -1, 0, -1, 0, 0, 0],
                                               [0, 1, 0, 0, 0, 0, 0, 0],
                                               [0, 0, 1, 0, 1, 0, 1, 0],
                                               [0, 1, 0, 1, 0, 1, 0, 1],
                                               [1, 0, 1, 0, 1, 0, 1, 0]])

        self.test_Game.move((4, 1, 0, 3))

        expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                    [-1, 0, -1, 0, -1, 0, -1, 0],
                                    [0, -1, 0, 0, 0, -1, 0, -1],
                                    [0, 0, -1, 0, -1, 0, 0, 0],
                                    [0, 1, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 1, 0, 1, 0, 1, 0],
                                    [0, 1, 0, 1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1, 0, 1, 0]])

        np.testing.assert_array_equal(expected_result,
                                      self.test_Game.board_state)

        self.test_Game.reset()
        print("capture from first col")
        self.test_Game.turn = -1
        self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                               [-1, 0, -1, 0, -1, 0, -1, 0],
                                               [0, -1, 0, 0, 0, -1, 0, -1],
                                               [-1, 0, -1, 0, -1, 0, 0, 0],
                                               [0, 1, 0, 0, 0, 0, 0, 0],
                                               [0, 0, 0, 0, 1, 0, 1, 0],
                                               [0, 1, 0, 1, 0, 1, 0, 1],
                                               [1, 0, 1, 0, 1, 0, 1, 0]])

        self.test_Game.move((3, 0, 5, 2))

        expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1],
                                    [-1, 0, -1, 0, -1, 0, -1, 0],
                                    [0, -1, 0, 0, 0, -1, 0, -1],
                                    [0, 0, -1, 0, -1, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, -1, 0, 1, 0, 1, 0],
                                    [0, 1, 0, 1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1, 0, 1, 0]])

        np.testing.assert_array_equal(expected_result,
                                      self.test_Game.board_state)