def test_if_three_in_a_row_wins(self): board = Board() tile_type = 1 # White board.add_tile(tile_type, row=1, column=1) board.add_tile(tile_type, row=1, column=0) board.add_tile(tile_type, row=1, column=2) self.assertEqual(board.three_in_a_row, True)
def test_a_cell_with_2or3_neighbours_stays_alive_after_tick(self): my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell, self.fourth_cell]) my_board.tick() self.assertTrue(my_board.cell_alive_at(6, 6)) self.assertTrue(my_board.cell_alive_at(5, 6)) self.assertTrue(my_board.cell_alive_at(5, 7)) self.assertTrue(my_board.cell_alive_at(6, 7))
def test_combine(self): board = Board(4, 4) start_single = [2, 2, 0, 0] start_double = [2, 2, 4, 4] end_single = [4, 0, 0, 0] end_double = [4, 0, 8, 0] self.assertEqual(end_single, board._Board__combine(start_single)) self.assertEqual(end_double, board._Board__combine(start_double))
def ai_move(): layout = request.args.get('layout') layout = json.loads(layout) board = Board(setup=layout) ai.move(board) layout = board.board() layout = json.dumps(layout) return layout
def test_transpose(self): board = Board() tile_type = 1 # White board.add_tile(tile_type, row=1, column=1) board.add_tile(tile_type, row=1, column=0) board.add_tile(tile_type, row=1, column=2) #board.transpose() self.assertEqual(str(board), "")
def test_compress(self): board = Board(4, 4) start = [2, 0, 4, 0] end_backward = [2, 4, 0, 0] end_forward = [0, 0, 2, 4] self.assertEqual(end_backward, board._Board__compress(start, 'left')) self.assertEqual(end_forward, board._Board__compress(start, 'right')) self.assertEqual(end_backward, board._Board__compress(start, 'up')) self.assertEqual(end_forward, board._Board__compress(start, 'down'))
def test_set(self): board = Board(4, 4) end = [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] board._Board__set(0, 0, 1) self.assertEqual(end, board.grid)
class TestBoard(unittest.TestCase): def setUp(self): self.board = Board(10,10) def test_initialise_collection(self): self.board.initialise_collection() self.assertEqual(len(self.board.collection), 100) def test_get_neighbours(self): for i in range (100): self.board.collection.append(Cell(False))
def test_board_join(self): """ When player2 join to the game, the pieces are in the board. """ player1 = MockPlayer() player2 = MockPlayer() board1 = Board(player1) board1.join(player2) self.assertEqual(board1._pieces[(0, 0)], Piece(player1, 7))
class StackingTests(unittest.TestCase): def setUp(self): self.board = Board() def test_stack_one_red_on_one_blue(self): red = self.board.get_colour('red') blue = self.board.get_colour('blue') new_red, new_blue = stack(red, blue) self.assertEqual(red.top, new_blue.top) self.assertEqual([], new_red) self.assertEqual(2, new_blue.height)
def setup_board(players, num_of_nodes): Setup().setup_logger() nodes = {} board = Board(nodes) #create empty nodes for i in range(0, num_of_nodes): nodes[i] = board.Node(i, 0, 1) #create connections for i in range(1, num_of_nodes): board.connect_nodes(i, i - 1) board.connect_nodes(0, num_of_nodes - 1) for i in range(0, num_of_nodes/4): board.connect_nodes(randint(0, num_of_nodes - 1), randint(0, num_of_nodes - 1)) #split nodes between players nodes_to_split = nodes.values()[:] shuffle(nodes_to_split) for i in range(0, num_of_nodes): player_id = i % len(players) nodes_to_split[i].owner = player_id #split armies between players armies_per_node = 3 armies_per_player = num_of_nodes * (armies_per_node - 1) / len(players) for player in players: my_nodes = board.nodes_by_owner(player.id) for i in range(0, armies_per_player): node = my_nodes[randint(0, len(my_nodes) - 1)] node.add_army(1) return board
def test_new_tile(self): start = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] board = Board(4, 4) self.assertEqual(start, board.grid) board.new_tile() self.assertNotEqual(start, board.grid)
def test_traverse(self): board = Board() paths = board.traverse() # horizontals for y in range(0, 3): self.assertIn([(0, y), (1, y), (2, y)], paths) # verticals for x in range(0, 3): self.assertIn([(x, 0), (x, 1), (x, 2)], paths) # \ diagonal self.assertIn([(0, 0), (1, 1), (2, 2)], paths) # / diagonal self.assertIn([(0, 2), (1, 1), (2, 0)], paths)
def setUp(self): player1 = MockPlayer() player2 = MockPlayer() board1 = Board(player1) board1.join(player2) board1._pieces = { (0, 0): Piece(player1, 7), (8, 8): Piece(player2, 7), (0, 3): Hole(player1), (8, 3): Hole(player2), } self.board = board1 self.player1 = player1 self.player2 = player2
def test_double_combine(self): start = [ 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0 ] end = [ 2, 2, 0, 0, 2, 3, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0 ] board = Board(4, 4, start) board.move('left') self.assertEqual(end, board.grid)
def test_combine_down(self): start = [ 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 ] end = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2 ] board = Board(4, 4, start) board.move('down') self.assertEqual(end, board.grid)
def test_combine_left(self): start = [ 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1 ] end = [ 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0 ] board = Board(4, 4, start) board.move('left') self.assertEqual(end, board.grid)
def __init__(self, w, h, owner): self.sid = sha1(str(time.time())).hexdigest() self.clients = [owner] owner.player = 1 self.player_counter = 2 self.state = STATE_JOINING self.board = Board(w, h) self.turn = owner owner.send(Msg('newgame', self.sid, w, h).encode()) owner.send(Msg('turn', self.turn.player).encode())
def main(): solver = Solver() manual = Player() n = 17 if(len(sys.argv)>1): n=int(sys.argv[1]); # Start simulator on Circle with 10 points with solver as player 0 # manual (you) as player 1. sim = Simulator(Board.from_circle(n),solver,manual) print("Starting simulation on circle with <0> points. The computer is\ playing first!\n") while(sim.step()): pass; input()
def start(self, PLAYERS): self.board = Board.initial(GUICard) # Clear the ingame data. self.start_turn_time = time.clock() self.set_time = 0 self.players = {x: [] for x in xrange(PLAYERS)} #self.cards_drawn = False self.table_clickable = False # Is it possible to select cards? self.draw_END_BUTTON = False # Draw 'No Sets' and block the game? self.player_turn = None # Whose turn (== player_id or None)
def server(): import cv2 s = socket.socket() host = socket.gethostname() print(host) print(socket.gethostbyname(socket.gethostname())) host = HOST port = PORT s.bind((host, port)) s.listen(5) while True: print('listening...') game = Game(handicap=HANDICAP) board_img = game.get_current_board_img() cv2.imshow('board_img', board_img) param = {'MCTS': False} cv2.setMouseCallback('board_img', game.cap_click, param=param) cv2.waitKey(33) c, addr = s.accept() print('Got connection from', addr) while True: before_len = len(game.boards) board_img = game.get_current_board_img(last_move=game.current_moves[-1]) cv2.imshow('board_img', board_img) cv2.waitKey(33) now_len = len(game.boards) if now_len > before_len: print(param['MCTS']) board_img = game.get_current_board_img(last_move=game.current_moves[-1]) cv2.imshow('board_img', board_img) cv2.waitKey(33) latest_board = game.boards[-2] # board before human move next_to_play = game.next_to_play board_str = Board.mtx2str(latest_board.board_mtx) next_to_play = str(next_to_play) print('next_to_play:', next_to_play) c.send(str.encode(str(game.current_moves[-1]) + '|' + next_to_play + '|' + board_str + '|' + str(int(param['MCTS'])))) print(str(game.current_moves[-1])) move = c.recv(1024).decode('utf-8') print('move', move) temp = move.split(',') x, y = int(temp[0][1:]), int(temp[1][1:-1]) print(x, y, game.next_to_play) game.mk_move(x, y) c.close()
def main(): p1 = Player(name="John") p2 = Player(name="Hulio") b = Board() b.add_player(p1) b.add_player(p2) b.start() _pprint(filled_cells(b.state)) raw_input() destruct_status = False prev_check = [] state = b.state promotions = 0 while state not in ("FIN", ): # for player in (p1, p2): # b.get_state(player) event = smart_event_factory(state, colors, destruct_status, prev_check) color = 'white' if 'w' == event["name"][0] else 'black' if check_the_turn(event, state) == False: continue else: print "VALID" if "promoted" in event.keys() and event["promoted"]: promotions +=1 print "PROMOTED CHECKS: ", promotions state = check_the_game_state(state, event, colors) if event['cutDown'] and destruction_is_possible(event['finalPos'], filled_cells(state, color=colors[1]), filled_cells(state, color=colors[0]), event.get('promoted')): destruct_status = True prev_check = event['finalPos'] print 'NEXT DESTRUCT' raw_input() else: colors.reverse() destruct_status = False prev_check = [] print 'COLORS REVERSED!' print 'FILLED CELLS: ' , len((filled_cells(state))) raw_input()
def __init__(self, width, height, players_num, left_player=Soldier, right_player=Soldier): """Initializes game board with width and height, adds players_num playes on both sides addional parameters left_player and right_player can be set to different subclasses of Player's """ Board.__init__(self) self.width = width self.height = height self.front = [width/2 for _ in range(height)] self.positions = {} # place front in positions for y, x in enumerate(self.front): self.positions[(x, y)] = 'f' # place players for n in range(players_num): player = Soldier(0, n, 'left') self.add_player(player, 'left') player = Soldier(width-1, height-1-n, 'right') self.add_player(player, 'right')
def test_a_cell_without_neighbours_dies_after_tick(self): dying_cell = Cell(4, 4) my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell, dying_cell]) my_board.tick() self.assertFalse(my_board.cell_alive_at(4, 4)) self.assertTrue(my_board.cell_alive_at(6, 6)) self.assertTrue(my_board.cell_alive_at(5, 6)) self.assertTrue(my_board.cell_alive_at(5, 7))
def test_get_range(self): start = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] row_first = [1, 2, 3, 4] row_last = [13, 14, 15, 16] col_first = [1, 5, 9, 13] col_last = [4, 8, 12, 16] board = Board(4, 4, start) self.assertEqual(row_first, board._Board__get_range(0, 'left')) self.assertEqual(row_first, board._Board__get_range(0, 'right')) self.assertEqual(row_last, board._Board__get_range(3, 'left')) self.assertEqual(row_last, board._Board__get_range(3, 'right')) self.assertEqual(col_first, board._Board__get_range(0, 'up')) self.assertEqual(col_first, board._Board__get_range(0, 'down')) self.assertEqual(col_last, board._Board__get_range(3, 'up')) self.assertEqual(col_last, board._Board__get_range(3, 'down'))
def py_get_liberty(matrix): black_liberty = np.zeros((19, 19, 8), dtype=np.uint8) white_liberty = np.zeros((19, 19, 8), dtype=np.uint8) visited = {} for i in range(19): for j in range(19): if matrix[i][j] == 1 and (i, j) not in visited: groups = Board.get_group(i, j, matrix, visited=visited) num_liberty = Board.check_liberty(groups, matrix, cnt=True) if num_liberty > 8: num_liberty = 8 for stone in groups: black_liberty[stone[0]][stone[1]][num_liberty-1] = 1 if matrix[i][j] == 2 and (i, j) not in visited: groups = Board.get_group(i, j, matrix, visited=visited) num_liberty = Board.check_liberty(groups, matrix, cnt=True) if num_liberty > 8: num_liberty = 8 for stone in groups: white_liberty[stone[0]][stone[1]][num_liberty-1] = 1 stones = np.concatenate((black_liberty, white_liberty), axis=2) return stones
def test_a_died_cell_with_3_alive_neighbours_resurrects(self): my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell]) my_board.tick() self.assertTrue(my_board.cell_alive_at(6, 6)) self.assertTrue(my_board.cell_alive_at(5, 6)) self.assertTrue(my_board.cell_alive_at(5, 7)) # Having 3 alive neighbours comes back to life. self.assertTrue(my_board.cell_alive_at(6, 7))
def test_a_cell_with_more_then_3_neighbours_dies(self): my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell, self.fourth_cell, self.fifth_cell]) my_board.tick() # dying cells: first and fifth. self.assertFalse(my_board.cell_alive_at(6, 6)) self.assertFalse(my_board.cell_alive_at(7, 5)) self.assertTrue(my_board.cell_alive_at(5, 6)) self.assertTrue(my_board.cell_alive_at(5, 7)) self.assertTrue(my_board.cell_alive_at(6, 7))
def check(self): in_set = self.board.has_set() # 1. Add up to 12 cards if len(self.board.table) < TABLE_SIZE: self.board = self.board.add_cards() # 2. Continue adding cards until there's a set elif not in_set and len(self.board.table) < TABLE_LIMIT: self.board = self.board.add_cards() # 3. Reshuffle if no set and window can't handle more cards elif not in_set and len(self.board.table) == TABLE_LIMIT: cards = random.shuffle(self.board.deck + self.board.table) new_deck = cards[TABLE_SIZE:] new_table = cards[:TABLE_SIZE] self.board = Board(new_deck, new_table) # 4. Break if added more than window can handle assert(len(self.board.table) <= TABLE_LIMIT)
def client(): s = socket.socket() host = HOST print('connecting to ' + host) port = PORT s.connect((host, port)) game_play = GamePlay( policy_net_path='../trained_models/policy', value_net_path='../trained_models/value') game = Game(handicap=HANDICAP) while True: message = s.recv(4096) message = message.decode('utf-8') print(type(message), message) # board_mtx, next_to_move = message.split('|') move, next_to_move, current_board, is_search = message.split('|') print(move, next_to_move, current_board, is_search) if int(is_search) == 1: game_play.mcts.time_limit = 20 else: game_play.mcts.time_limit = 0.5 while Board.mtx2str(game.boards[-1].board_mtx) != current_board: print('roll_back') game.roll_back() print(len(game.boards)) moves = move.split(',') x, y = int(moves[0][1:]), int(moves[1][1:-1]) game.mk_move(x, y) output = game_play.play(game) game.mk_move(output[0], output[1]) s.send(bytes(str(output), encoding='utf-8')) # s.send(str(output)) s.close()
from game import Board import random import player import pandas as pd inp1 = -1 inp1 = -1 #For simulating data for _ in range(500): out = [] print("Game") print(_) win_percent_history = [] board = Board(False, False) board.build_board() win_percent_history = player.simulate_game(board, -1, 10) for sim in win_percent_history: for turn in sim: for play in turn: out.append(play) df = pd.DataFrame(out, columns=['player','board', 'win_percent']) df.to_csv('game_data2.csv', mode='a' ,index=False)
def __init__(self): self.board = Board() self.draw()
class Ui: """Command line user interface Manages all the interaction with the user. """ color_dict = {0: "White", 1: "Black"} piece_type_dict = { 0: "\u2654\u265a", 1: "\u2655\u265b", 2: "\u2656\u265c", 3: "\u2657\u265d", 4: "\u2658\u265e", 5: "\u2659\u265f" } x_str2int = { "a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7 } y_str2int = { "1": 0, "2": 1, "3": 2, "4": 3, "5": 4, "6": 5, "7": 6, "8": 7 } x_int2str = { 0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h" } y_int2str = { 0: "1", 1: "2", 2: "3", 3: "4", 4: "5", 5: "6", 6: "7", 7: "8" } def __init__(self): self.board = Board() self.draw() def draw(self): """Draws the board configuration in the terminal""" display = " a b c d e f g h \n" + \ "________________________________ \n " # Loop over all x and y indices for j in range(8): display += " " + str(j + 1) + "|" for i in range(8): # Find the piece index for position [i, j] position_ij = Position(i, j) piece = self.board.find_piece(position_ij) if piece.color != None: display += " " + \ Ui.piece_type_dict[piece.type][piece.color] + " " else: # Draw an empty cell display += " - " # New line for different i value display += "|" + str(j + 1) + " \n " display += "_______________________________ \n" + \ " a b c d e f g h \n" self.board_string = display print(display) def turn(self): """"Performs a turn within ui""" selected_piece = self.select_piece() position = self.select_move(selected_piece) self.board.turn(selected_piece, position, self.draw, self.check, self.check_mate) def select_piece(self) -> Piece: """Asks the user to select a piece to make a move with""" question = Ui.color_dict[self.board.turn_color] + \ ", your turn! Please select a piece. \n" piece = Piece(None, None, None, None, None) while piece.color is None or piece.color != self.board.turn_color: coordinate = input(question) position = self.coordinate2position(coordinate) piece = self.board.find_piece(position) question = "No piece of yours at this field, try again! \n" return piece def select_move(self, selected_piece: int) -> Position: """Asks the user where to move the selected piece""" question = "The selected piece can move to " + \ self.moves2text(selected_piece) + "\n" coordinate = input(question) position = self.coordinate2position(coordinate) while not position in selected_piece.moves: question = "Your piece can't move to the selected field, try again! \n" coordinate = input(question) position = self.coordinate2position(coordinate) return position def moves2text(self, selected_piece: Piece) -> str: """Turns a list of positions into a string with coordinates""" text = "" for move in selected_piece.moves: text += self.position2coordinate(move) + ", " return text def coordinate2position(self, coordinate: str) -> Position: """Converts user input to a board position""" x = Ui.x_str2int[coordinate[0]] y = Ui.y_str2int[coordinate[1]] return Position(x, y) def position2coordinate(self, position: Position) -> str: """Converts user a position to a ui coordinate""" return Ui.x_int2str[position.x] + Ui.y_int2str[position.y] def check(self): """Function that notifies players when check""" print('Check!') def check_mate(self): """Function that notifies players when check mate""" print('Check mate! The game is over')
def test_print_action_boom(self): string_action = Board().string_action self.assertEqual(string_action([BOOM, (0, 0)]), "BOOM at (0, 0).") self.assertEqual(string_action([BOOM, (5, 2)]), "BOOM at (5, 2).")
def test_move_row_works(self): b = Board(seed=False) # Make the top row 2 1 0 1 b._tiles[0, :] = [2, 1, 0, 1] # Move to the left, so look at row left-to-right row_1 = ((0, 0), (0, 1), (0, 2), (0, 3)) b._move_tile_row(row_1) self.assertEqual(b._tiles.take(0), 2) self.assertEqual(b._tiles.take(1), 2) b._move_tile_row(row_1) self.assertEqual(b._tiles.take(0), 4) # Make the second row 1 1 0 2 b._tiles[1, :] = [1, 1, 0, 2] row_2 = ((1, 0), (1, 1), (1, 2), (1, 3)) b._move_tile_row(row_2) # Expect 2 2 0 0 self.assertEqual(b._tiles.take(4), 2) self.assertEqual(b._tiles.take(5), 2) b._move_tile_row(row_2) self.assertEqual(b._tiles.take(4), 4) # First column is now 2 2 0 0 # Move up to merge col_1 = ((0, 0), (1, 0), (2, 0), (3, 0)) b._move_tile_row(col_1) self.assertEqual(b._tiles.take(0), 8)
cnt = 0 for board_mtx, move in zip(games[0], games[1]): cnt += 1 if cnt % 200 != 0: continue mtx = board_mtx tic = time.time() for i in range(20): py_ret_mtx = py_get_liberty(mtx) toc = time.time() print(toc - tic) tic = time.time() for i in range(20): string = Board.mtx2str(mtx) string = gofeat.get_liberty(string) ret_mtx = np.fromstring(string, sep=' ', dtype=np.int).reshape(16, 19, 19).transpose(1, 2, 0) toc = time.time() print(toc - tic) print(np.sum(py_ret_mtx - ret_mtx)) print(ret_mtx.shape) # for i in range(16): # print('num', i+1) # li_b = Board(board_mtx=ret_mtx[i, :, :]) # li_canvas = li_b.visualize_board(grid_size=35) # board = Board(board_mtx=board_mtx) # canvas = board.visualize_board(grid_size=35)
def run(width, agent1, agent2, file1, file2, start, rounds): n = 5 height = width try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### def parse_agent(agent_type, filename): if agent_type == 'mcts_a0': model_file = 'best_policy_8_8_5.model' if filename: model_file = filename # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load( open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400 ) # set larger n_playout for better performance elif agent_type == 'mcts_pure': player = MCTS_Pure(c_puct=5, n_playout=1000) elif agent_type == 'minmax': player = Minimax() elif agent_type == 'dqn': model_file = 'output/v_1/epoch_100/agent_2.pkl' if filename: model_file = filename player = DQNPlayer(model_file) elif agent_type == 'human': player = Human() else: player = Human() print('Illegal Agent Type. Defaulting to human player.') return player player1 = parse_agent(agent1, file1) player2 = parse_agent(agent2, file2) winners = [] for i in range(rounds): winner = game.start_play(player1, player2, start_player=start - 1, is_shown=1) winners.append(winner) winrate1 = winners.count(1) / rounds winrate2 = winners.count(2) / rounds print('Winners: ' + ','.join([str(w) for w in winners])) print(str(agent1) + ' 1' + ' win rate: ' + str(winrate1)) print(str(agent2) + ' 2' + ' win rate: ' + str(winrate2)) except KeyboardInterrupt: print('\n\rquit')
class Gui(): """Grafical user interface for playing chess""" font = 'Courier 20' color_dict = {0: '#b0b0b0', 1: '#ffffff'} piece_type_dict = { 0: { 0: '\u2654', 1: '\u265a' }, 1: { 0: '\u2655', 1: '\u265b' }, 2: { 0: '\u2656', 1: '\u265c' }, 3: { 0: '\u2657', 1: '\u265d' }, 4: { 0: '\u2658', 1: '\u265e' }, 5: { 0: '\u2659', 1: '\u265f' } } turn_color_dict = {0: 'White', 1: 'Black'} def __init__(self): # Init board self.board = Board() # Init root self.root = tk.Tk() # Create general structure self.board_frame = tk.Frame(self.root) self.board_frame.pack() self.test_frame = tk.Label(self.root, text='Welcome', font=Gui.font) self.test_frame.pack() self.user_input = tk.Entry(self.root, font=Gui.font) self.user_input.configure(state='readonly') self.user_input.pack() self.user_input_given = tk.IntVar(master=self.user_input, name='piece_type', value=-1) # Create buttons/fields self.buttons = [[], [], [], [], [], [], [], []] self.fields = [[], [], [], [], [], [], [], []] for x, y in product(range(8), range(8)): field_color = (x + y) % 2 self.fields[x].append( tk.Frame(self.board_frame, height=50, width=50, background=Gui.color_dict[field_color])) self.fields[x][y].propagate(False) self.fields[x][y].grid(column=x, row=8 - y) self.buttons[x].append( tk.Button(self.fields[x][y], background=Gui.color_dict[field_color], activebackground='#f2ff00', borderwidth=0, font='Courier 30')) self.buttons[x][y].pack(fill='both', expand=True) self.draw() self.select_piece() self.root.mainloop() def ask_promotion_type(self): """Asks the user which piece to promote""" self.user_input.bind('<Return>', self.promote2input) self.test_frame.configure(text='Promote to type:') self.user_input.configure(state='normal') self.reset_buttons() self.user_input_given.set(-1) self.user_input.wait_variable(name='piece_type') user_input = self.user_input_given.get() self.user_input.delete(0, len(self.user_input.get())) self.user_input.configure(state='readonly') return user_input def promote2input(self, event) -> int: """Gets the entered text from the entry box""" type_dict = { 'king': 0, 'queen': 1, 'rook': 2, 'bishop': 3, 'knight': 4, 'pawn': 5 } promotion_type = type_dict[self.user_input.get()] # self.board.promote(piece, promotion_type) self.user_input.bind('<Return>') self.user_input_given.set(promotion_type) def select_piece(self): """Select piece to move""" color = self.board.turn_color for x, rows in enumerate(self.buttons): for y, button in enumerate(rows): piece = self.board.find_piece(Position(x, y)) if piece.color == color and \ piece.moves != [] and \ piece.moves != None: func = partial(self.show_moves, piece) button.configure(command=func) def show_moves(self, piece: Piece): """Marks the fields where the selected piece can move to""" self.reset_buttons() for move in piece.moves: self.buttons[move.x][move.y].configure(background='#f2ff00', command=partial( self.select_move, piece, move)) def select_move(self, piece: Piece, position): """Runs when player selects where to move to""" self.reset_buttons() self.board.recalculate(piece, position, self.ask_promotion_type) self.board.delete_self_check() self.board.turn_counter += 1 self.board.turn_color = int(not self.board.turn_color) self.draw() if self.board.check == True: self.board.check_mate = \ self.board.ischeckmate(self.board.turn_color) if self.board.check_mate == True: self.test_frame.configure(text='Check mate!') else: self.test_frame.configure(text='Check!') else: message = Gui.turn_color_dict[self.board.turn_color] + \ ', it\'s your turn' self.test_frame.configure(text=message) self.select_piece() def draw(self): """Draws pieces on the board""" for (x, y) in product(range(8), range(8)): piece = self.board.find_piece(Position(x, y)) if piece.color != None: self.buttons[x][y].config( text=Gui.piece_type_dict[piece.type][piece.color]) else: self.buttons[x][y].config(text='') def reset_buttons(self): """Resets the buttons colors and commands""" for x, y in product(range(8), range(8)): button = self.buttons[x][y] button.configure(command=False, background=Gui.color_dict[(x + y) % 2])
import os from game import Board def bad_coordinates(): os.system("say 'bad coordinates'") print("bad coordinates") board = Board(4, 5, 6) while (not board.game_over): try: os.system("say 'enter x coordinate'") x = int(input("enter x coordinate: ")) os.system("say 'enter y coordinate'") y = int(input("enter y coordinate: ")) except: bad_coordinates() if (x >= 0 and x < board.width and y >= 0 and y < board.height): board.guess_square(x, y) else: bad_coordinates()
class TestCases(unittest.TestCase): def setUp(self): self.test_board1 = Board() # The 11 white stack can't move, extra 1 white at (0, 0) self.test_board2 = Board({ 'white': [[11, 4, 4], [1, 0, 0]], 'black': [[1, 0, 4], [1, 1, 4], [1, 2, 4], [1, 3, 4], [1, 5, 4], [1, 6, 4], [1, 7, 4], [1, 4, 0], [1, 4, 1], [1, 4, 2], [1, 4, 3], [1, 4, 5], [1, 4, 6], [1, 4, 7]] }) # White stack in corner that can move self.test_board3 = Board({ 'white': [[4, 7, 7]], 'black': [[3, 7, 6], [3, 6, 7], [2, 7, 5], [2, 5, 7], [1, 7, 4], [1, 4, 7]] }) # Board full of white pieces self.test_board4 = Board({ 'white': [[1, x, y] for x in range(8) for y in range(8)], 'black': [] }) def test_board_initilisation(self): self.assertEqual(self.test_board1.board, [[0] * 8] * 8) self.assertEqual( self.test_board2.board, [[1, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0], [-1, -1, -1, -1, 11, -1, -1, -1], [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0]]) self.assertEqual(self.test_board2.n_white, 12) self.assertEqual(self.test_board2.n_black, 14) def test_get_all_white_actions(self): self.assertEqual( self.test_board1.get_all_white_actions(self.test_board1.board), []) self.assertEqual( self.test_board2.get_all_white_actions(self.test_board2.board), [[BOOM, (0, 0)], [1, (1, 0, 0, 1, 0)], [1, (1, 0, 0, 0, 1)], [BOOM, (4, 4)]]) self.assertEqual( self.test_board3.get_all_white_actions(self.test_board3.board), [[0, (7, 7)], [1, (1, 7, 7, 3, 7)], [1, (1, 7, 7, 7, 3)], [1, (2, 7, 7, 3, 7)], [1, (2, 7, 7, 7, 3)], [1, (3, 7, 7, 3, 7)], [1, (3, 7, 7, 7, 3)], [1, (4, 7, 7, 3, 7)], [1, (4, 7, 7, 7, 3)]]) def test_get_actions(self): self.assertEqual( self.test_board3.get_actions(self.test_board3.board, 7, 5), [[0, (7, 5)], [1, (1, 7, 5, 6, 5)], [1, (1, 7, 5, 5, 5)], [1, (1, 7, 5, 7, 6)], [1, (1, 7, 5, 7, 4)], [1, (1, 7, 5, 7, 3)], [1, (2, 7, 5, 6, 5)], [1, (2, 7, 5, 5, 5)], [1, (2, 7, 5, 7, 6)], [1, (2, 7, 5, 7, 4)], [1, (2, 7, 5, 7, 3)]]) def test_explode(self): self.assertEqual( self.test_board4.explode(self.test_board4.board, 0, 0), ([[0] * 8] * 8, 64)) self.assertEqual( self.test_board2.explode(self.test_board2.board, 7, 4), ([[1] + [0] * 7] + [[0] * 8] * 7, 25)) def test_move(self): self.assertEqual( self.test_board3.move(self.test_board3.board, 1, 6, 7, 6, 5), [[0] * 8] * 4 + [[0] * 7 + [-1], [0] * 7 + [-2], [0] * 5 + [-1, 0, -2], [0] * 4 + [-1, -2, -3, 4]]) self.assertEqual( self.test_board3.move(self.test_board3.board, 3, 6, 7, 4, 7), [[0] * 8] * 4 + [[0] * 7 + [-4], [0] * 7 + [-2], [0] * 8, [0] * 4 + [-1, -2, -3, 4]]) def test_print_action_boom(self): string_action = Board().string_action self.assertEqual(string_action([BOOM, (0, 0)]), "BOOM at (0, 0).") self.assertEqual(string_action([BOOM, (5, 2)]), "BOOM at (5, 2).") def test_print_action_move(self): string_action = Board().string_action self.assertEqual(string_action([MOVE, (1, 0, 0, 0, 1)]), "MOVE 1 from (0, 0) to (0, 1).") self.assertEqual(string_action([MOVE, (5, 2, 3, 7, 3)]), "MOVE 5 from (2, 3) to (7, 3).")
def cycle(): startboard = np.array([[4] * 6, [0] * 2, [4] * 6]) # starting board b = Board(startboard) c = 1 player = 0 empty = [0] * 6 moves = 0 while b.board[0] != empty and b.board[2] != empty: # while at least one side of the board has stones in it pos = r.randint(1, 6) if c % 2 == 1: player = 1 elif c % 2 == 0: player = 2 if player == 1: b.movep1(pos - 1) moves+=1 while b.goagainp1: b.movep1(pos - 1) moves+=1 elif player == 2: b.movep2(pos - 1) moves+=1 while b.goagainp2: b.movep2(pos - 1) moves+=1 c += 1 if b.board[0] == empty: b.board[1][1] += sum(b.board[2]) b.board[2] = empty elif b.board[2] == empty: b.board[1][0] += sum(b.board[0]) b.board[0] = empty score = '{}-{}'.format(max(b.board[1]), min(b.board[1])) if b.board[1].index(max(b.board[1])) == 0 and not (max(b.board[1]) == min(b.board[1])): winner = 2 elif b.board[1].index(max(b.board[1])) == 1 and not (max(b.board[1]) == min(b.board[1])): winner = 1 else: winner = 0 ''' print(b) if winner == 0: print("It's a draw! Final score " + score) else: print("Player {} won! Final score ".format(winner) + score) ''' return winner, moves
def __init__(self, init_model=None): # params of the board and the game self.board_width = 15 self.board_height = 15 self.n_in_row = 5 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) self.manual = Manual(self.board) # training params self.learn_rate = 1e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.n_playout = 100 # num of simulations for each move self.c_puct = 1 self.buffer_size = 100000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.episode_len = 0 self.kl_targ = 0.02 self.check_freq = 1 self.game_batch_num = 5 self.best_win_ratio = 0.55 # num of simulations used for the pure mcts, which is used as # the opponent to evaluate the trained policy self.pure_mcts_playout_num = 1000 self.lock = threading.Lock() if init_model: # start training from an initial policy-value net self.g1 = tf.Graph() with self.g1.as_default(): self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, graph=self.g1, output='/data/data/') # tf.reset_default_graph() self.g2 = tf.Graph() with self.g2.as_default(): self.policy_value_net_train = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, graph=self.g2, output='/data/output/') else: # start training from a new policy-value net self.g1 = tf.Graph() with self.g1.as_default(): self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, graph=self.g1, output='./data/') # tf.reset_default_graph() self.g2 = tf.Graph() with self.g2.as_default(): self.policy_value_net_train = PolicyValueNet(self.board_width, self.board_height, graph=self.g2, output='./output/') self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
class TrainPipeline(): def __init__(self, init_model=None): # params of the board and the game self.board_width = 15 self.board_height = 15 self.n_in_row = 5 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) self.manual = Manual(self.board) # training params self.learn_rate = 1e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.n_playout = 100 # num of simulations for each move self.c_puct = 1 self.buffer_size = 100000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.episode_len = 0 self.kl_targ = 0.02 self.check_freq = 1 self.game_batch_num = 5 self.best_win_ratio = 0.55 # num of simulations used for the pure mcts, which is used as # the opponent to evaluate the trained policy self.pure_mcts_playout_num = 1000 self.lock = threading.Lock() if init_model: # start training from an initial policy-value net self.g1 = tf.Graph() with self.g1.as_default(): self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, graph=self.g1, output='/data/data/') # tf.reset_default_graph() self.g2 = tf.Graph() with self.g2.as_default(): self.policy_value_net_train = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, graph=self.g2, output='/data/output/') else: # start training from a new policy-value net self.g1 = tf.Graph() with self.g1.as_default(): self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, graph=self.g1, output='./data/') # tf.reset_default_graph() self.g2 = tf.Graph() with self.g2.as_default(): self.policy_value_net_train = PolicyValueNet(self.board_width, self.board_height, graph=self.g2, output='./output/') self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1) def get_equi_data(self, play_data): """augment the data set by rotation and flipping play_data: [(state, mcts_prob, winner_z), ..., ...] """ extend_data = [] for state, mcts_porb, winner in play_data: for i in [1, 2, 3, 4]: # rotate counterclockwise equi_state = np.array([np.rot90(s, i) for s in state]) equi_mcts_prob = np.rot90(np.flipud( mcts_porb.reshape(self.board_height, self.board_width)), i) extend_data.append((equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) # flip horizontally equi_state = np.array([np.fliplr(s) for s in equi_state]) equi_mcts_prob = np.fliplr(equi_mcts_prob) extend_data.append((equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) return extend_data def collect_selfplay_data(self, n_games=1): """collect self-play data for training""" for i in range(n_games): # self.lock.acquire() # print("game {}".format(i)) with self.g1.as_default(): '''mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1) board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) game = Game(board)''' winner, play_data = self.game.start_self_play(self.mcts_player, is_shown=0, temp=self.temp) # self.lock.release() play_data = list(play_data)[:] self.episode_len = len(play_data) # augment the data play_data = self.get_equi_data(play_data) self.data_buffer.extend(play_data) # print("self play end...") def collect_manual_data(self, file): winner, play_data = self.manual.read_manual_data(file) # read the chess manual fail if winner == 0: return play_data = list(play_data)[:] self.episode_len = len(play_data) # augment the data play_data = self.get_equi_data(play_data) self.data_buffer.extend(play_data) def collect_test_data(self): self.board.init_board() states, mcts_probs, current_players = [], [], [] move = 128 self.board.do_move(112) states.append(self.board.current_state()) probs = np.zeros(self.board.width * self.board.height) probs[[move]] = 1 mcts_probs.append(probs) current_players.append(self.board.current_player) winners_z = np.array([1]) play_data = zip(states, mcts_probs, winners_z) play_data = list(play_data)[:] self.data_buffer.extend(play_data) def policy_update(self): """update the policy-value net""" mini_batch = random.sample(self.data_buffer, self.batch_size) state_batch = [data[0] for data in mini_batch] mcts_probs_batch = [data[1] for data in mini_batch] winner_batch = [data[2] for data in mini_batch] with self.g2.as_default(): for i in range(self.epochs): loss, entropy = self.policy_value_net_train.train_step( state_batch, mcts_probs_batch, winner_batch, self.learn_rate*self.lr_multiplier) print(( "lr_multiplier:{:.3f}," "loss:{}," "entropy:{}," ).format( self.lr_multiplier, loss, entropy)) return loss, entropy def policy_evaluate(self, n_games=10): """ Evaluate the trained policy by playing against the pure MCTS player Note: this is only for monitoring the progress of training """ print("evaluating...") current_mcts_player = MCTSPlayer(self.policy_value_net_train.policy_value_fn, c_puct=self.c_puct, n_playout=self.pure_mcts_playout_num) best_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.pure_mcts_playout_num) win_cnt = defaultdict(int) for i in range(n_games): winner = self.game.start_play(current_mcts_player, best_mcts_player, start_player=i % 2, is_shown=0) win_cnt[winner] += 1 win_ratio = 1.0*(win_cnt[1] + 0.5*win_cnt[-1]) / n_games print("num_playouts:{}, win: {}, lose: {}, tie:{}".format( self.pure_mcts_playout_num, win_cnt[1], win_cnt[2], win_cnt[-1])) # save the current_model self.policy_value_net_train.save_model('/data/output/current_policy.model') if win_ratio > self.best_win_ratio: print("New best policy!!!!!!!!") # update the best_policy self.policy_value_net_train.save_model('/data/output/best_policy.model') self.g1 = tf.Graph() with self.g1.as_default(): self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file='/data/output/best_policy.model', graph=self.g1, output='/data/data/') return win_ratio def run(self): """run the training pipeline""" try: '''coord = tf.train.Coordinator() self_play = [threading.Thread(target=self.collect_selfplay_data, args=(self.play_batch_size,)) for i in range(4)] for sp in self_play: sp.start() coord.join(self_play) while len(self.data_buffer) < self.batch_size: print(len(self.data_buffer)) time.sleep(3) pass''' multiplier = [0.1, 0.1, 0.01, 0.01, 0.01] step = 0 for n in range(self.game_batch_num): self.collect_selfplay_data(self.play_batch_size) # self.collect_test_data() self.policy_value_net.n_step += 1 print("batch i:{}, episode_len:{}".format( self.policy_value_net.n_step, self.episode_len)) # optimisation if len(self.data_buffer) > self.batch_size: for i in range(100): self.policy_update() # evaluation if self.policy_value_net.n_step % self.check_freq == 0: # self.lr_multiplier = multiplier[step] # step += 1 self.mcts_player.mcts._discount = 1 - 0.98*(1 - self.mcts_player.mcts._discount) print("current self-play batch: {}, discount: {}".format( self.policy_value_net.n_step, self.mcts_player.mcts._discount)) # self.lock.acquire() self.policy_evaluate(n_games=15) # self.lock.release() except KeyboardInterrupt: print('\n\rquit')
def game(): # For screen-casting if RECORD_SCREEN: recorder = ScreenRecorder(WIDTH, HEIGHT, FPS, out_file=OUTPUT_FILE) run = True board = Board.from_start_position() solver = AutoSolver(board) selected_piece = None # A surface to draw the board onto.. board_surf = pygame.Surface(BOARD_SIZE) loader = Loader() def draw(): board_color = (205, 127, 50) text_background = (0, 100, 255) text_color = (255, 255, 255) # Fill the window and the board win.fill(darken_color(board_color, 0.5)) board_surf.fill(board_color) # Draw the title label onto the window pygame.draw.rect(win, text_background, (TITLE_OFFSETS, TITLE_SIZE)) title_label = main_font.render(f"KLOTSKI PUZZLE", 1, text_color) win.blit(title_label, (TITLE_OFFSETS[0] + TITLE_SIZE[0] // 2 - title_label.get_width() // 2, TITLE_OFFSETS[1] + TITLE_SIZE[1] // 2 - title_label.get_height() // 2)) # Draw the steps label onto the window pygame.draw.rect(win, text_background, (SCORE_OFFSETS, SCORE_SIZE)) steps_label = main_font.render(f"Step {board.number_of_steps}", 1, text_color) win.blit(steps_label, (SCORE_OFFSETS[0] + SCORE_SIZE[0] // 2 - steps_label.get_width() // 2, SCORE_OFFSETS[1] + SCORE_SIZE[1] // 2 - steps_label.get_height() // 2)) # Draw the board and copy it onto the window board.draw(board_surf, TILE_SIZE) win.blit(board_surf, BOARD_OFFSETS) if board.is_solved: # Show the message when game is solved # NOTE: Game does not end when puzzle is solved, user can continue.. success_label = main_font.render(f"Congratulations!", 1, text_color) win.blit(success_label, (BOARD_OFFSETS[0] + BOARD_SIZE[0] // 2 - success_label.get_width() // 2, BOARD_OFFSETS[1] + BOARD_SIZE[1] // 2 - success_label.get_height() // 2)) if solver.loading: # Show a loader when auto-solver is computing the moves. loader.draw( win, pygame.Rect( (WIDTH // 2 - TILE_SIZE // 2, HEIGHT // 2 - TILE_SIZE // 2, TILE_SIZE, TILE_SIZE))) def handle_select(pos): # Handles mouse button down event. # Sets the selected_piece if a piece is selected nonlocal selected_piece selected_piece = None pos = pos[0] - BOARD_OFFSETS[0], pos[1] - BOARD_OFFSETS[1] if 0 <= pos[0] < BOARD_SIZE[0] and 0 <= pos[1] < BOARD_SIZE[1]: position = Position(pos[0] // TILE_SIZE, pos[1] // TILE_SIZE) selected_piece = board.get_piece(position) def handle_drop(pos): # Handles mouse button up event. # Moves the selected_piece if to specified position if allowed. # Specified position must be an empty position! nonlocal selected_piece pos = pos[0] - BOARD_OFFSETS[0], pos[1] - BOARD_OFFSETS[1] if 0 <= pos[0] < BOARD_SIZE[0] and 0 <= pos[1] < BOARD_SIZE[1]: click_position = Position(pos[0] // TILE_SIZE, pos[1] // TILE_SIZE) if selected_piece: possible_pos = board.can_move(selected_piece, click_position) if possible_pos: board.move(selected_piece, possible_pos) def reset(): # creates a new board to reset it nonlocal board, selected_piece, solver board = Board.from_start_position() selected_piece = None # Reset the solver as well solver = AutoSolver(board) def handle_user_event(_event): nonlocal selected_piece if _event.type == pygame.KEYDOWN: # Board reset if _event.key == pygame.K_r: reset() # History events if _event.key == pygame.K_LEFT: board.history_back() if _event.key == pygame.K_RIGHT: board.history_forward() # Solver if _event.key == pygame.K_a: # Normal Solver selected_piece = None solver.enable() if _event.key == pygame.K_s: # Fast solver selected_piece = None solver.enable(int(FPS * 0.1)) if _event.type == pygame.MOUSEBUTTONDOWN and _event.button == 1: # left click handle_select(_event.pos) if _event.type == pygame.MOUSEBUTTONUP and _event.button == 1: # left click handle_drop(_event.pos) while run: draw() pygame.display.update() if RECORD_SCREEN: recorder.capture_frame(win) solver.loop() for event in pygame.event.get(): if event.type == pygame.QUIT or \ (event.type == pygame.KEYDOWN and event.key == pygame.K_q): run = False if not solver.enabled: # User inputs taken only when solver not running handle_user_event(event) if not solver.enabled: # Power keys while navigating history # Allows continuous press keys = pygame.key.get_pressed() if keys[pygame.K_DOWN]: board.history_back() elif keys[pygame.K_UP]: board.history_forward() clock.tick(FPS) if RECORD_SCREEN: recorder.stop() pygame.quit()
def __init__(self): """ 關於訓練的初始設置 *補充說明 kl 用於計算 lr (learning rate) """ # run() ----------------------------------------------------------------------------------- self.game_batch_num = -1 # 跑一次訓練的重複次數,負值代表不限制 self.play_batch_size = 1 # 自我訓練的執行次數 self.batch_size = 1024 # 每次要訓練的資料量,當 data_buffer 的資料累積到超過本數值就會更新 policy self.check_freq = 50 # 每訓練 ( check_freq ) 次就會與MCTS比賽 self.save_freq = 50 # 每訓練 ( save_freq ) 次就會存檔 # collect_selfplay_data() ----------------------------------------------------------------- self.buffer_size = 10000 self.data_buffer = deque(maxlen=self.buffer_size) self.kl_targ = 0.02 # policy_update() ------------------------------------------------------------------------- self.epochs = 5 # 每次更新的 epochs 數 # board ----------------------------------------------------------------------------------- self.board_width = 9 # 寬度 self.board_height = 9 # 高度 self.n_in_row = 5 # 多少顆連成一線獲得勝利 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) # keras ----------------------------------------------------------------------------------- self.learn_rate = 2e-3 self.lr_multiplier = 1.0 # 基於KL自適應調整學習率 self.temp = 1.0 # 溫度參數,太小會導致訓練不夠全面 file_folder = './n400-o' model_tag = '9_9_5_o' self.current_model= f'{file_folder}/current_model_{model_tag}.h5' self.best_model= f'{file_folder}/best_model_{model_tag}.h5' init_model = self.current_model self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file = init_model if os.path.exists(init_model) else None) self.progress = file_folder + '/progress.csv' self.evaluate_path = file_folder + '/evaluate.csv' self.history_path = file_folder + '/history.csv' self.history = [] # MCTS ------------------------------------------------------------------------------------ self.c_puct = 5 # MCTS的搜索偏好 self.loss_goal = 0 #! 存檔時 loss 小於此值會增加訓練時的 n_playout 次數 self.pure_mcts_playout_num = 1000 # MCTS每一步的模擬次數,隨著模型強度提升 self.pure_mcts_playout_num_upgrade = 1000 # MCTS隨著模型強度提升的模擬次數 self.best_win_ratio = 0.0 self.n_playout = 400 # 神經網路每一步的模擬次數,越大代表結果越依賴MCTS的技巧,否則依靠神經網路的判斷 self.n_playout_training = 400 self.n_playout_growth = 0 self.n_playout_limit = 2000 self.MCTS_levelup()
def play(): board = Board() A, B = Player(board, 'white'), Player(board, 'black')
class MctsTest(): def __init__(self, init_model=None): # 棋盘大小 8*8, 5个子连起来 self.board_width = 8 self.board_height = 8 self.n_in_row = 5 # n子相连 self.policy_evaluate_size = 2 # 策略评估胜率时的模拟对局次数 self.batch_size = 1 # data_buffer中对战次数超过n次后开始启动模型训练 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) # training params self.learn_rate = 2e-3 self.lr_multiplier = 1.0 # 基于KL的自适应学习率 self.temp = 1.0 # the temperature param self.n_playout = 400 # 每个动作的模拟次数 self.c_puct = 5 self.buffer_size = 10000 # cache对战记录个数 self.data_buffer = deque(maxlen=self.buffer_size) # 完整对战历史记录,用于训练 self.epochs = 5 # 每次更新策略价值网络的训练步骤数 self.kl_targ = 0.02 # 策略价值网络KL值目标 self.best_win_ratio = 0.0 # 纯MCT的模拟数,用于评估策略模型 self.pure_mcts_playout_num = 5 self.policy_value_net = PolicyValueNet(self.board_width, self.board_height) # 创建使用策略价值网络来指导树搜索和评估叶节点的MCTS玩家 """self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)""" def get_equi_data(self, play_data): """ 通过旋转和翻转增加数据集 play_data: [(state, mcts_prob, winner_z), ..., ...] """ extend_data = [] for state, mcts_porb, winner in play_data: for i in [1, 2, 3, 4]: # 逆时针旋转 equi_state = np.array([np.rot90(s, i) for s in state]) equi_mcts_prob = np.rot90( np.flipud( mcts_porb.reshape(self.board_height, self.board_width)), i) extend_data.append( (equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) # 水平翻转 equi_state = np.array([np.fliplr(s) for s in equi_state]) equi_mcts_prob = np.fliplr(equi_mcts_prob) extend_data.append( (equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) return extend_data def policy_update(self): """更新策略价值网络policy-value""" # 随机抽取data_buffer中的对抗数据 mini_batch = random.sample(self.data_buffer, self.batch_size) state_batch = [data[0] for data in mini_batch] mcts_probs_batch = [data[1] for data in mini_batch] winner_batch = [data[2] for data in mini_batch] old_probs, old_v = self.policy_value_net.policy_value(state_batch) # 训练策略价值网络 for i in range(self.epochs): loss, entropy = self.policy_value_net.train_step( state_batch, mcts_probs_batch, winner_batch, self.learn_rate * self.lr_multiplier) new_probs, new_v = self.policy_value_net.policy_value(state_batch) kl = np.mean( np.sum(old_probs * (np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)), axis=1)) if kl > self.kl_targ * 4: # 如果D_KL跑偏则尽早停止 break # 自动调整学习率 if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1: self.lr_multiplier /= 1.5 elif kl < self.kl_targ / 2 and self.lr_multiplier < 10: self.lr_multiplier *= 1.5 explained_var_old = (1 - np.var(np.array(winner_batch) - old_v.flatten()) / np.var(np.array(winner_batch))) explained_var_new = (1 - np.var(np.array(winner_batch) - new_v.flatten()) / np.var(np.array(winner_batch))) logging.info( ("TEST kl:{:.5f}," "lr_multiplier:{:.3f}," "loss:{}," "entropy:{}," "explained_var_old:{:.3f}," "explained_var_new:{:.3f}").format(kl, self.lr_multiplier, loss, entropy, explained_var_old, explained_var_new)) return loss, entropy def policy_evaluate(self, n_games=10): """ 策略胜率评估:模型与纯MCTS玩家对战n局看胜率 """ # AlphaGo Zero风格的MCTS玩家(使用策略价值网络来指导树搜索和评估叶节点) current_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout) # 纯MCTS玩家 pure_mcts_player = MCTSPurePlayer(c_puct=5, n_playout=self.pure_mcts_playout_num) win_cnt = defaultdict(int) for i in range(n_games): # 对战 winner = self.game.start_play(current_mcts_player, pure_mcts_player, start_player=i % 2, is_shown=0) win_cnt[winner] += 1 # 胜率 win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games logging.info("TEST Num_playouts:{}, win: {}, lose: {}, tie:{}".format( self.pure_mcts_playout_num, win_cnt[1], win_cnt[2], win_cnt[-1])) return win_ratio def run(self): """启动训练""" try: #test # 初始化棋盘 self.board.init_board() print(self.board) print(self.board.current_player) print(self.board.availables) print(self.board.states) print(self.board.last_move) p1, p2 = self.board.players states, mcts_probs, current_players = [], [], [] # 纯MCTS玩家 #player = self.mcts_player player = MCTSPurePlayer(c_puct=5, n_playout=self.pure_mcts_playout_num) print('------get_action------') #move, move_probs = player.get_action(self.board, temp=self.temp, return_prob=1) move = player.get_action(self.board) print(move) """# 保存当前盘面 states.append(self.board.current_state()) current_players.append(self.board.current_player) # 执行落子 print('------do_move------') self.board.do_move(move) self.game.graphic(self.board, p1, p2) # 检查游戏是否结束 print('------check_game_end------') end, winner = self.board.game_end() if end: # 从当前玩家视角确定winner winners_z = np.zeros(len(current_players)) if winner != -1: # 不是和棋 winners_z[np.array(current_players) == winner] = 1.0 # 更新赢家步骤位置=1 winners_z[np.array(current_players) != winner] = -1.0 # 更新输家步骤位置=-1 # 重置MCTS根结点 player.reset_player() if winner != -1: print("Game end. Winner is player:", winner) else: print("Game end. Tie") print(winner, zip(states, mcts_probs, winners_z)) """ """ i=0 # 1.收集自我对抗数据 # 使用MCTS蒙特卡罗树搜索进行自我对抗 winner, play_data = self.game.start_self_play(self.mcts_player, temp=self.temp) play_data = list(play_data)[:] self.episode_len = len(play_data) print(play_data) print(self.episode_len) # 把翻转棋盘数据加到数据集里 play_data = self.get_equi_data(play_data) # 保存对抗数据到data_buffer self.data_buffer.extend(play_data) logging.info("TEST Batch i:{}, episode_len:{}".format(i + 1, self.episode_len)) # 2.使用对抗数据重新训练策略价值网络模型 if len(self.data_buffer) >= self.batch_size: loss, entropy = self.policy_update() # 3.检查一下当前模型胜率 logging.info("TEST Current self-play batch: {}".format(i + 1)) # 策略胜率评估:模型与纯MCTS玩家对战n局看胜率 win_ratio = self.policy_evaluate(self.policy_evaluate_size) self.policy_value_net.save_model(CUR_PATH + '/model/current_test_{}_{}.model'.format(self.board_width, self.board_height)) if win_ratio > self.best_win_ratio: # 胜率超过历史最优模型 logging.info("TEST New best policy!!!!!!!!batch:{} win_ratio:{}->{} pure_mcts_playout_num:{}".format(i + 1, self.best_win_ratio, win_ratio, self.pure_mcts_playout_num)) self.best_win_ratio = win_ratio # 保存当前模型为最优模型best_policy self.policy_value_net.save_model(CUR_PATH + '/model/best_test_{}_{}.model'.format(self.board_width, self.board_height)) # 如果胜率=100%,则增加纯MCT的模拟数 if (self.best_win_ratio == 1.0 and self.pure_mcts_playout_num < 5000): self.pure_mcts_playout_num += 1000 self.best_win_ratio = 0.0 """ except KeyboardInterrupt: logging.info('\n\rquit')
game_input = input("game: ") if(game_input == "n"): game_num += 1 elif(game_input == "exit"): break else: game_num = int(game_input) turn_num = 0 game = all_data[game_num] while(True): turn_input = input("turn: ") if(turn_input == "n" or turn_input == ""): turn_num += 1 elif(turn_input == "b"): break else: turn_num = int(turn_input) states = game["states"] turn = states[turn_num] not_board = Board() not_board.board[1:-1, 1:-1] = idtobits(np.array(turn["board"])) not_board.print_board() print(turn["player_turn"]-1) print(turn["pieces_left"]) #interesting games: #599
def test_new_board_starts_with_two_squares(self): b = Board() self.assertFalse(b.is_board_full()) self.assertEqual(len(b.empty_tiles()), len(b._tiles.flatten()) - 2)
def preprocess_board(board_mtx, y, random=True, contain_liberty=False): # rand = np.random.randint(0, 2) # if rand == 1: # board_mtx = board_mtx.T # y['next_to_move'] = (y['next_to_move'][1], y['next_to_move'][0]) # y['current_move'] = (y['current_move'][1], y['current_move'][0]) if random: rand = np.random.randint(0, 8) if rand <= 3: board_mtx = board_mtx.T y['current_move'] = (y['current_move'][1], y['current_move'][0]) y['next_move'] = (y['next_move'][1], y['next_move'][0]) i = rand % 4 if i == 1: board_mtx = np.rot90(board_mtx) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) # print(a[2-idx[1]][idx[0]]) if i == 2: board_mtx = np.rot90(board_mtx) board_mtx = np.rot90(board_mtx) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) if i == 3: board_mtx = np.rot90(board_mtx) board_mtx = np.rot90(board_mtx) board_mtx = np.rot90(board_mtx) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) y['current_move'] = (18 - y['current_move'][1], y['current_move'][0]) y['next_move'] = (18 - y['next_move'][1], y['next_move'][0]) black_stones = np.zeros((19, 19, 1), dtype=np.uint8) black_stones[board_mtx == 1] = 1 white_stones = np.zeros((19, 19, 1), dtype=np.uint8) white_stones[board_mtx == 2] = 1 if contain_liberty: black_liberty = np.zeros((19, 19, 8), dtype=np.uint8) white_liberty = np.zeros((19, 19, 8), dtype=np.uint8) visited = {} for i in range(19): for j in range(19): if board_mtx[i][j] == 1 and (i, j) not in visited: groups = Board.get_group(i, j, board_mtx, visited=visited) num_liberty = Board.check_liberty(groups, board_mtx, cnt=True) if num_liberty > 8: num_liberty = 8 for stone in groups: black_liberty[stone[0]][stone[1]][num_liberty - 1] = 1 if board_mtx[i][j] == 2 and (i, j) not in visited: groups = Board.get_group(i, j, board_mtx, visited=visited) num_liberty = Board.check_liberty(groups, board_mtx, cnt=True) if num_liberty > 8: num_liberty = 8 for stone in groups: white_liberty[stone[0]][stone[1]][num_liberty - 1] = 1 black_stones = np.concatenate((black_stones, black_liberty), axis=2) white_stones = np.concatenate((white_stones, white_liberty), axis=2) # for i in range(9): # print(board_mtx) # print('liberty:', i) # print(black_stones[:, :, i]) # print('===') # # print('XXXXXX') # print('XXXXXX') # print('XXXXXX') # # for i in range(9): # print(board_mtx) # print('liberty:', i) # print(white_stones[:, :, i]) # print('===') # exit() stones = np.concatenate((black_stones, white_stones), axis=2) ones = np.ones((19, 19, 1), dtype=np.uint8) last_move = np.zeros((19, 19, 1), dtype=np.uint8) if not y['ko_state:']: last_move[y['current_move'][0]][y['current_move'][1]] = 1 else: last_move[y['current_move'][0]][y['current_move'][1]] = -1 is_black_next = np.ones((19, 19, 1), dtype=np.uint8) if y['next_to_play'] == 2: is_black_next -= 1 feat = np.concatenate((stones, last_move, is_black_next, ones), axis=2) return feat
def test_can_make_moves(self): b = Board(seed=False) # Make sure it's fully empty first self.assertEqual(len(b.empty_tiles()), 16) # Put a square in the top left corner val = 1 pos = 0 b._tiles.put(pos, val) # Move in the negative direction along the rows (up) # Then move in the negative direction along the columns (left) # Should do nothing, we are at the edge for d in (1, 3): b.move(d, suppress_invalid=True) self.assertEqual(b._tiles.take(pos), val) # Move in the positive direction along the rows (down) # This should move three rows b.move(0) pos += b.length * 3 self.assertEqual(b._tiles.take(pos), val) # At the bottom - movement should do nothing b.move(0, suppress_invalid=True) self.assertEqual(b._tiles.take(pos), val) # Move in the positive direction along the columns (right) # This should move three columns b.move(2) pos += 3 self.assertEqual(b._tiles.take(pos), val) # At the right - movement should do nothing b.move(2, suppress_invalid=True) self.assertEqual(b._tiles.take(pos), val)
def test_print_action_move(self): string_action = Board().string_action self.assertEqual(string_action([MOVE, (1, 0, 0, 0, 1)]), "MOVE 1 from (0, 0) to (0, 1).") self.assertEqual(string_action([MOVE, (5, 2, 3, 7, 3)]), "MOVE 5 from (2, 3) to (7, 3).")
def test_board(): print("Board() class tests") dimension = int(sys.argv[1]) board = Board(dimension) board_2 = Board(dimension) print("Imprimimos tablero vacio: ") print(board) board.update_board([0, 2], 'X') board.update_board([0, 0], 'O') board.update_board([1, 2], 'X') board.update_board([2, 2], 'X') board.update_board([1, 0], 'X') board.update_board([2, 0], 'O') board.update_board([0, 1], 'O') board.update_board([1, 1], 'X') board.update_board([2, 1], 'X') if dimension == 4: board.update_board([3, 3], 'X') print("Imprimimos tablero con contenido: ") print(board) print(board.is_tateti()) print(board.get_board()) print(board.get_id()) print(board.get_dimension()) # board_2 print(board_2) print(board_2.is_tateti()) board_2.update_board([0, 0], 'X') print(board_2) print(board_2.is_tateti())
def test_player(): print("Player() class tests") dimension = 3 board_player = Board(dimension) print("Imprimimos tablero vacio: ") print(board_player) board_player.update_board([0, 2], 'X') board_player.update_board([0, 0], 'O') board_player.update_board([1, 2], 'X') board_player.update_board([2, 2], 'X') board_player.update_board([1, 0], 'X') board_player.update_board([2, 0], 'O') board_player.update_board([0, 1], 'O') board_player.update_board([1, 1], 'X') #board_player.update_board([2, 1], 'X') print(board_player) player_1 = Player('Joaquin', 0, 0, 0) player_2 = Player('Xano', 1, 1, 1) print(player_1) print(player_2) player_1.movement(board_player) print(board_player) print(board_player.is_tateti())
def initUI(self): self.tup = (None, None) self.board = Board() # 棋盘类 self.board.init_board(1) palette1 = QPalette() # 设置棋盘背景 palette1.setBrush(self.backgroundRole(), QtGui.QBrush(QtGui.QPixmap('img/linesofaction.png'))) self.setPalette(palette1) # self.setStyleSheet("board-image:url(img/chessboard.jpg)") # 不知道这为什么不行 self.setCursor(Qt.PointingHandCursor) # 鼠标变成手指形状 # self.sound_piece = QSound("sound/luozi.wav") # 加载落子音效 # self.sound_win = QSound("sound/win.wav") # 加载胜利音效 # self.sound_defeated = QSound("sound/defeated.wav") # 加载失败音效 self.resize(WIDTH, HEIGHT) # 固定大小 540*540 self.setMinimumSize(QtCore.QSize(WIDTH, HEIGHT)) self.setMaximumSize(QtCore.QSize(WIDTH, HEIGHT)) self.setWindowTitle("Lines-Of-Action") # 窗口名称 self.setWindowIcon(QIcon('img/black.png')) # 窗口图标 # self.lb1 = QLabel(' ', self) # self.lb1.move(20, 10) self.black = QPixmap('img/black.png') self.white = QPixmap('img/white.png') self.piece_now = BLACK # 黑棋先行 self.my_turn = True # 玩家先行 self.step = 0 # 步数 self.x, self.y = 1000, 1000 #self.mouse_point = LaBel(self) # 将鼠标图片改为棋子 # self.mouse_point.setScaledContents(True) # self.mouse_point.setPixmap(self.black) # 加载黑棋 # self.mouse_point.setGeometry(270, 270, PIECE, PIECE) self.pieces = [[ LaBel(self), LaBel(self), LaBel(self), LaBel(self), LaBel(self), LaBel(self), LaBel(self), LaBel(self) ] for _ in range(8)] # 新建棋子标签,准备在棋盘上绘制棋子 # for piece in self.pieces: # piece.setVisible(True) # 图片可视 # piece.setScaledContents(True) # 图片大小根据标签大小可变 for i in range(8): for j in range(8): self.pieces[i][j].setVisible(True) self.pieces[i][j].setScaledContents(True) #self.mouse_point.raise_() # 鼠标始终在最上层 self.ai_down = True # AI已下棋,主要是为了加锁,当值是False的时候说明AI正在思考,这时候玩家鼠标点击失效,要忽略掉 mousePressEvent self.setMouseTracking(True) self.DrawPieces() self.show()
class App(ConnectionListener): def __init__(self, host, port, run_server=False): self._running = True self._screen = None self.reset_sound = None self.run_server = run_server self.size = self.width, self.height = 1800, 960 self.board = Board(self) self.dice = Dice(self) self.init_pieces() self.player_count = 0 self.other_mouse = OtherMouse() if self.run_server: self.server = BackgammonServer(localaddr=(host, port)) self.Connect((host, port)) def init_pieces(self, send=True): self.pieces = list() self.fields = [[] for _ in range(24)] self.fields[0] = [True] * 2 self.fields[5] = [False] * 5 self.fields[7] = [False] * 3 self.fields[11] = [True] * 5 self.fields[23] = [False] * 2 self.fields[18] = [True] * 5 self.fields[16] = [True] * 3 self.fields[12] = [False] * 5 self.pieces = list() self.piece_size = 42 self.ping_iter = 0 ident = 1 for field_id, field in enumerate(self.fields): top = field_id // 12 == 1 for piece_id, is_black in enumerate(field): offset_x = self.board.triangle_width//2 + \ self.board.triangle_width * (field_id % 12) + \ ((field_id % 12) // 6) * self.board.offset_x x = offset_x if top else self.width - offset_x ((field_id % 12) // 6) * self.board.offset_x y = self.piece_size * \ (piece_id*2+1) if top else self.height - \ self.piece_size * (piece_id*2+1) pos = (x, y) self.pieces.append(Piece(self, ident, pos, is_black)) ident += 1 self.dice.reset() if self.reset_sound is not None: self.reset_sound.play() if send: connection.Send({"action": "resetboard"}) def send_gamestate(self): pieces = list() for p in self.pieces: p.send_move() self.dice.send_state() self.dice.send_eyes() def on_init(self): pygame.init() pygame.mixer.init() self.reset_sound = pygame.mixer.Sound('sound/button.wav') self.impact_sound = pygame.mixer.Sound('sound/impact.wav') self.font = pygame.font.Font(pygame.font.get_default_font(), 22) pygame.display.set_caption('Backgammon') self.clock = pygame.time.Clock() self._screen = pygame.display.set_mode( self.size, pygame.HWSURFACE | pygame.DOUBLEBUF) self._running = True def ping(self): connection.Send({"action": "ping"}) def keep_connection_alive(self): # Ping every 4 seconds self.ping_iter = (self.ping_iter + 1) % 240 if self.ping_iter == 0: self.ping() def on_event(self, event): if event.type == pygame.QUIT: self._running = False elif event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE: self.dice.roll() elif event.key == pygame.K_ESCAPE: self.init_pieces() else: self.handle_piece_events(event) if event.type == pygame.MOUSEMOTION: connection.Send({'action': 'mousemotion', 'pos': event.pos}) def handle_piece_events(self, event): for idx, piece in enumerate(self.pieces): if piece.handle_event(event): if idx == 0: break for idx2, piece2 in enumerate(self.pieces): if idx == idx2: continue if piece.rect.colliderect(piece2.rect): break else: self.pieces.insert(0, self.pieces.pop(idx)) break else: self.dice.handle_event(event) def on_loop(self): self.keep_connection_alive() connection.Pump() self.Pump() if self.run_server: self.server.Pump() def on_render(self): self.board.render(self._screen) for piece in self.pieces[::-1]: piece.update(self._screen) self.dice.render(self._screen) self.other_mouse.render(self._screen) pygame.display.flip() def on_cleanup(self): pygame.quit() def on_execute(self): if self.on_init() == False: self._running = False while (self._running): self.clock.tick(60) for event in pygame.event.get(): self.on_event(event) self.on_loop() self.on_render() self.on_cleanup() def Network_connected(self, data): print("Connected to the server") def Network_disconnected(self, data): print("Disconnected from the server") self.player_count = 0 def Network_resetboard(self, data): self.init_pieces(False) def Network_roll(self, data): self.dice.roll(data) def Network_impact(self, data): self.impact_sound.play() def Network_eyes(self, data): self.dice.set_eye_counter(data['eyes']) def Network_pong(self, data): pass def Network_mousemotion(self, data): self.other_mouse.setPostion(data['pos']) def Network_playercount(self, data): new_player_count = int(data['count']) if self.run_server and new_player_count > self.player_count: self.send_gamestate() self.player_count = new_player_count if self.player_count < 2: self.other_mouse.set_visible(False) def Network_move(self, data): piece_move = data['piece'] for piece in self.pieces: if piece.ident == piece_move[0]: piece.move((piece_move[1], piece_move[2]), self._screen) break else: raise ValueError('Invalid piece ident!')
def main(): parser = argparse.ArgumentParser(description='Test') parser.add_argument('--replay_memory_size', default=50000, type=int, help='replayMemory_size to store training data') parser.add_argument('--batch_size', default=512, type=int, help='batch size') parser.add_argument('--learning_rate', default=1e-3, type=float, help='learning_rate') parser.add_argument('--evaluate_freq', default=50, type=int, help='evaluate once every #evaluate_freq games') parser.add_argument( '--train_freq', default=1, type=int, help='train #train_epoch times replay mempry within each train') parser.add_argument('--n_eval_game', default=10, type=int, help='number of games during one evaluation') parser.add_argument('--n_burn_in', default=10, type=int, help='number of games to burn in the replay memory') parser.add_argument('--n_iteration', default=20, type=int, help='number of train iteration') parser.add_argument('--width', default=6, type=int) parser.add_argument('--height', default=6, type=int) parser.add_argument('--n_in_row', default=4, type=int) args = parser.parse_args() width, height = args.width, args.height board = Board(width=width, height=height, n_in_row=args.n_in_row) game = Game(board) # Prepare train and eval model AlphaGoNet_train = PolicyValueNet(width, height) #AlphaGoNet_best = PolicyValueNet(width, height) #torch.save(AlphaGoNet_train.policy_value_net.state_dict(), 'model/init.mdl') AlphaGoNet_train.policy_value_net.load_state_dict( torch.load('model/current.mdl')) # Replay is used to store training data: ReplayMemory = deque(maxlen=args.replay_memory_size) player = AlphaGoPlayer(NN_fn=AlphaGoNet_train.policy_value_fn) #eval_player = AlphaGoPlayer(NN_fn=AlphaGoNet_best.policy_value_fn) eval_player = MCTSPlayer() max_win_ratio = .0 # Burn in burn_in(game, player, ReplayMemory, args.n_burn_in) for i in range(args.n_iteration): print 'Iteration NO.:', i train_one_iteration(game, player, ReplayMemory, AlphaGoNet_train, args.batch_size, args.learning_rate, args.train_freq, args.evaluate_freq) win_ratio = evaluate(game, player, eval_player, args.n_eval_game) if win_ratio > max_win_ratio: print('Get current_best model!') max_win_ratio = win_ratio torch.save(AlphaGoNet_train.policy_value_net.state_dict(), 'model/current_best.mdl') else: print('Save current model') torch.save(AlphaGoNet_train.policy_value_net.state_dict(), 'model/current.mdl')
def __init__(self, init_model=None): self.writer = SummaryWriter(WRITER_DIR) # params of the board and the game self.board_width = 6 self.board_height = 6 self.n_in_row = 4 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) # training params self.learn_rate = 2e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 self.check_freq = 50 self.game_batch_num = 5000 self.improvement_counter = 1000 self.best_win_ratio = 0.0 self.input_plains_num = INPUT_PLANES_NUM self.c_puct = 5 self.n_playout = 50 # num of simulations for each move self.shutter_threshold_availables = 1 self.full_boards_selfplay = False # num of simulations used for the pure mcts, which is used as # the opponent to evaluate the trained policy self.pure_mcts_playout_num = 200 self.pure_mcts_playout_num_step = 200 if init_model: # start training from an initial policy-value net self.policy_value_net = PolicyValueNet( self.board_width, self.board_height, self.input_plains_num, model_file=init_model, shutter_threshold_availables=self.shutter_threshold_availables) else: # start training from a new policy-value net self.policy_value_net = PolicyValueNet( self.board_width, self.board_height, self.input_plains_num, shutter_threshold_availables=self.shutter_threshold_availables) self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
best_score = float('+inf') best_move = None for legal_move in current_board.current_player.calculate_legal_moves(): new_board = legal_move.execute() move_value, move_obj = minimax(new_board, depth - 1, True) if move_value < best_score: best_score = move_value best_move = legal_move return best_score, best_move DEPTH = 2 current_board = Board.create_standard_board() while True: _, white_move = minimax(current_board, DEPTH, True) current_board = white_move.execute() print(current_board) print(_) input() _, black_move = minimax(current_board, DEPTH, True) current_board = black_move.execute() for i in current_board.current_player.calculate_legal_moves(): print(i) print(current_board) print(_) input()
move = -1 if move == -1 or move not in board.availables: print("invalid move") move = self.get_action(board) return move def __str__(self): return "Human {}".format(self.player) n = 5 width, height = 8, 8 model_file = 'best_policy_8_8_5.model' board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 #得到策略