def min_max_alpha_beta(game, turn, max_depth, alpha=-sys.float_info.max, beta=sys.float_info.max): best_score_move = None game.available_moves() moves = game.moves if not moves: return 0, None for move in moves: new_game = Checkers() new_game.board_state = game.board_state new_game.turn = game.turn new_game.moves_queen_with_out_capture = game.moves_queen_with_out_capture new_game.move(move) winner = new_game.win if winner != 0: return winner*10000, move else: if max_depth <= 1: score = evaluate(new_game) else: score, _ = min_max_alpha_beta(new_game, -turn, max_depth-1, alpha, beta) if turn > 0: if score > alpha: alpha = score best_score_move = move else: if score < beta: beta = score best_score_move = move if alpha >= beta: break return alpha if turn > 0 else beta, best_score_move
from dqn import DQN from coach import Coach from game import Checkers from arena import Arena import torch from tqdm import tqdm game = Checkers() dqn = DQN() NUM_EPOCHS = 80 NUM_TRAIN_GAMES = 200 NUM_EVAL_GAMES = 20 HALF = int(NUM_EVAL_GAMES/2) TRAIN_EPSILON = 0.4 GAMMA = 0.8 EVAL_EPSILON = 0.01 LR = 0.0001 PATH = "models/test.pt" coach = Coach( game=game, nnet=dqn, lr=LR) num_improvements = 0 for _ in range(NUM_EPOCHS): ## Saving a copy of the current network weights to pnet torch.save(coach.nnet.state_dict(), PATH) coach.pnet.load_state_dict(torch.load(PATH))
from game import Checkers from dqn import DQN import torch PATH = "models/30Improve.pt" dqn = DQN() dqn.load_state_dict(torch.load(PATH)) game = Checkers() game.reset() board = game.board ## Display print(board) color = 'red' while not game.get_game_ended(board, color): actions = game.get_possible_actions(board, color) state = game.get_state(board, color) state_actions = game.make_inputs(state, actions) if color == 'black': print("Agent turn") print("Agent possible actions") out = dqn(state_actions) for a, q in zip(actions, out): print(game.flip_action(a), q.item()) print(torch.max(out)) idx = torch.argmax(out) action = actions[idx]
with tf.Session() as session: model1.set_session(session) model2.set_session(session) session.run(tf.global_variables_initializer()) GAME.reset() wins = 0 for i in range(MIN_EXPERIENCES): GAME.available_moves() if GAME.win != 0: GAME.reset() move = random_play(GAME) action = encoding_move(move) GAME.move(move) if GAME.win == 0: new_GAME = Checkers() new_GAME.board_state = np.array(GAME.board_state) new_GAME.turn = GAME.turn new_GAME.moves_queen_with_out_capture = GAME.moves_queen_with_out_capture move = min_max.min_max_player(new_GAME, new_GAME.turn) GAME.move(move) reward = GAME.win experience_replay_buffer.add_experince(action, GAME.board_state, reward) t0 = datetime.now() for i in range(num_episodes): total_t, episode_reward, duration, num_steps_in_episode, time_per_step, epsilon = play_one( total_t, experience_replay_buffer, model1, model2, epsilon) episode_rewards[i] = episode_reward
def setUp(self): self.test_Game = Checkers()
class TestBoard(unittest.TestCase): def setUp(self): self.test_Game = Checkers() def test_move(self): self.test_Game.move((5, 0, 4, 1)) print('move') expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, -1, 0, -1, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) np.testing.assert_array_equal(expected_result, self.test_Game.board_state) self.test_Game.reset() print("capture") self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 0, 0, -1, 0, -1], [0, 0, -1, 0, -1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) self.test_Game.move((4, 1, 2, 3)) expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 1, 0, -1, 0, -1], [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) np.testing.assert_array_equal(expected_result, self.test_Game.board_state) self.test_Game.reset() print("trying no capture") self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 0, 0, -1, 0, -1], [0, 0, -1, 0, -1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) self.test_Game.move((4, 1, 0, 3)) expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 0, 0, -1, 0, -1], [0, 0, -1, 0, -1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) np.testing.assert_array_equal(expected_result, self.test_Game.board_state) self.test_Game.reset() print("capture from first col") self.test_Game.turn = -1 self.test_Game.board_state = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 0, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) self.test_Game.move((3, 0, 5, 2)) expected_result = np.array([[0, -1, 0, -1, 0, -1, 0, -1], [-1, 0, -1, 0, -1, 0, -1, 0], [0, -1, 0, 0, 0, -1, 0, -1], [0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]]) np.testing.assert_array_equal(expected_result, self.test_Game.board_state)