def test_check_stalemate(self): b = Board() self.assertFalse(b.check_stalemate()) b.grid = [ [2, 2, 2, 2], [2, 2, 2, 0], [2, 2, 2, 2], [2, 2, 2, 2], ] self.assertFalse(b.check_stalemate()) b.grid[1][3] = 2 self.assertFalse(b.check_stalemate()) b.grid = [ [2, 4, 2, 4], [2, 4, 2, 4], [2, 4, 2, 4], [2, 4, 2, 4], ] self.assertFalse(b.check_stalemate()) b.grid = [ [2, 4, 2, 4], [4, 2, 4, 2], [2, 4, 2, 4], [4, 2, 4, 2], ] self.assertTrue(b.check_stalemate())
def main(): # Use alphazero self-play for data generation agents_meta = parse_schedule() # worker variable of main process board = Board() sigexit = Event() sigexit.set( ) # pre-set signal so main proc generator will iterate only once # subprocess data generator helper = DataHelper(data_files=[]) helper.set_agents_meta(agents_meta=agents_meta) generator = helper.generate_batch(TRAINING_CONFIG["batch_size"]) # start generating with h5py.File(f"{DATA_CONFIG['data_path']}/latest.train.hdf5", 'a') as hf: for state_batch, value_batch, probs_batch in generator: for batch_name in ("state_batch", "value_batch", "probs_batch"): if batch_name not in hf: shape = locals()[batch_name].shape hf.create_dataset(batch_name, (0, *shape), maxshape=(None, *shape)) hf[batch_name].resize(hf[batch_name].shape[0] + 1, axis=0) hf[batch_name][-1] = locals()[batch_name] # prevent main proc from generating data too quick # since sigexit has been set, proc will iterate only once run_proc(helper.buffer, helper.buffer_size, helper.lock, sigexit, agents_meta, board) board.reset()
def __init__(self, model_file=None): # Build Network Architecture input_shape = Board().encoded_states().shape # (6, 15, 15) inputs = Input(input_shape) shared_net = Sequential([ *ConvBlock(32, input_shape=input_shape), *ConvBlock(64), *ConvBlock(128) ], "shared_net") policy_head = Sequential([ shared_net, *ConvBlock(4, (1, 1), "relu"), Flatten(), Dense(Game["board_size"], kernel_regularizer=l2()), Activation("softmax") ], "policy_head") value_head = Sequential([ shared_net, *ConvBlock(2, (1, 1), "relu"), Flatten(), Dense(64, activation="relu", kernel_regularizer=l2()), Dense(1, kernel_regularizer=l2()), Activation("tanh") ], "value_head") self.model = Model(inputs, [value_head(inputs), policy_head(inputs)]) if model_file is not None: self.restore_model(model_file)
def dual_play(agents, board=None, verbose=False, graphic=False): """ Play with 2 players. Params: agents: { Player.black: agent1, Player.white: agent2 }. board: initial board state. Start player will be determined here. verbose: if true, then return value will be in the form of training data. Returns: if verbose set to True: [(state_inputs, final_score, action_probs)] Each element is a numpy.array. else: winner """ if board is None: board = Board() elif board.status["is_end"]: board.reset() if verbose is True: result = [] else: result = Player.none while True: # set the current agent cur_agent = agents[board.status["cur_player"]] # evaluate board state and get action if verbose is True: _, action_probs, next_move = cur_agent.eval_state(board) result.append([ board.encoded_states(), board.status["cur_player"], action_probs ]) else: next_move = cur_agent.get_action(board) # update board board.apply_move(next_move) if graphic: print(board) # end judge if board.status["is_end"]: winner = board.status["winner"] if graphic: print("Game ends. winner is {}.".format(winner)) # format output result if verbose is True: result = [( state[0], np.array(Player.calc_score(state[1], winner)), state[2] ) for state in result] else: result = winner return result
def _reset(self): self.board = Board(self.board_shape, max_per_cell=1) self.phase = self.game_phases[0] self.state = SeegaState(board=self.board, next_player=self.first_player, boring_limit=self.just_stop, game_phase=self.phase) self.current_player = self.first_player
def start(width: int, height: int, n: int): """ Start the game width : Width of the board height : Height of the board n : How many move the angel wanted to consider from current position """ board = Board(width, height) (angel, devil) = create_character(board, n) turn = 0 while not is_terminal(board, angel): turn += 1 angel_position = angel.get_position() devil_position = devil.get_position() #Initial random angel move board.fill(angel_position[0], angel_position[1], ANGLE_SPACE) #Initial random devil move board.fill(devil_position[0], devil_position[1], BLOCK_SPACE) print("Current board") board.pretty_print() print("******************") #Angel turn print("Angle turn") board, angel_choosen_move = angel_move(board, angel) print("Current board") board.pretty_print() print("******************") #Devil turn print("Devil turn") board = devil_move(board, angel_choosen_move, devil) print(f"Game end at turn:{turn}")
def _reset(self): self.done = False self.board = Board(self.board_shape) self.state = SeegaState(board=self.board, next_player=self.player_color, boring_limit=self.just_stop) # self.trace = Trace( # self.state, players={-1: self.players[-1].name, 1: self.players[1].name} # ) self.current_player = self.player_color self._fill_board()
def test_spawn_minimum_0(self): b = Board(self.seed) b.spawn_minimum() expected = self.initial_grid[:] expected[1][0] = 2 self.assertEqual(b.grid, expected) b.spawn_minimum() expected[3][0] = 2 self.assertEqual(b.grid, expected)
def test_reversed(self): b = Board() b.grid = self.grid_1 expected = [ [64, 16, 8, 0], [32, 4, 0, 0], [4, 2, 0, 0], [0, 4, 0, 0], ] self.assertEqual(b.grid, self.grid_1) b.reverse() self.assertEqual(b.grid, expected)
def test_transpose(self): b = Board() b.grid = self.grid_1 expected = [ [0, 0, 0, 0], [8, 0, 0, 0], [16, 4, 2, 4], [64, 32, 4, 0], ] self.assertEqual(b.grid, self.grid_1) b.transpose() self.assertEqual(b.grid, expected)
def __init__(self, model_name=None): with tf.variable_scope("Dataset"): input_shape = Board().encoded_states().shape # (6, 15, 15) self.iter = tf.data.Iterator.from_structure( (tf.float32, tf.float32, tf.float32), (tf.TensorShape( (None, *input_shape)), tf.TensorShape( (None, )), tf.TensorShape((None, Game["board_size"])))) self.mini_batch = self.iter.get_next() self.inputs, self.state_value, self.action_probs = self.mini_batch inputs_t = tf.transpose(self.inputs, [0, 2, 3, 1]) # channel_last self.inputs_t = inputs_t with tf.variable_scope("SharedNet"): conv_blocks = [] for i in range(3): conv_blocks.append( tf.layers.conv2d( inputs=inputs_t if i == 0 else conv_blocks[i - 1], filters=2**(i + 5), # 32, 64, 128 kernel_size=(3, 3), padding="same", activation=tf.nn.relu, name="conv_{}".format(i))) shared_output = conv_blocks[-1] with tf.variable_scope("PolicyHead"): policy_conv = tf.layers.conv2d(inputs=shared_output, filters=4, kernel_size=(1, 1), padding="same", activation=tf.nn.relu) policy_flatten = tf.layers.flatten(policy_conv) self.policy_logits = tf.layers.dense(policy_flatten, Game["board_size"]) self.policy_output = tf.nn.softmax(self.policy_logits, name="policy_output") with tf.variable_scope("ValueHead"): value_conv = tf.layers.conv2d(inputs=shared_output, filters=2, kernel_size=(1, 1), padding="same", activation=tf.nn.relu) value_flatten = tf.layers.flatten(value_conv) value_hidden = tf.layers.dense(value_flatten, 64, tf.nn.relu) value_logits = tf.layers.dense(value_hidden, 1) self.value_output = tf.reshape(tf.nn.tanh(value_logits), [-1], name="value_output") self.session = tf.Session() self.saver = tf.train.Saver() self.model_file = self._parse_path(model_name) self.initialized = False
def test_left(self): b = Board(self.seed) b.left() expected = [ [0, 0, 0, 0], [2, 0, 0, 0], # spawn [2, 0, 0, 0], [0, 0, 0, 0], ] self.assertEqual(b.grid, expected) b = Board(self.seed) b.grid = self.grid_2 b.left() expected = [ [8, 0, 0, 0], [4, 2, 0, 0], [16, 4, 0, 2], # spawn [8, 0, 0, 0], ] self.assertEqual(b.grid, expected)
def test_squash_0(self): b = Board() arr = [0, 2, 0, 4] self.assertEqual(b.squash(arr), [2, 4, 0, 0]) arr = [8, 8, 8, 8] self.assertEqual(b.squash(arr), [16, 16, 0, 0]) arr = [2, 2, 4, 4] self.assertEqual(b.squash(arr), [4, 8, 0, 0]) arr = [2, 4, 8, 4] self.assertEqual(b.squash(arr), [2, 4, 8, 4])
def run_proc(buffer, maxlen, lock, sigexit, agents_meta, board=None): """ Multiprocessing target funcion """ if board is None: board = Board() agents = parse_agents_meta(agents_meta) while True: data = simulate_game_data(board, agents) print(f"Finished one episode with {len(data)} samples.") with lock: if len(buffer) >= maxlen: buffer = buffer[len(buffer) - maxlen:] buffer.extend(data) if sigexit.is_set(): return
def __init__(self, shape, current_player=-1, parent=None): super(BoardGUI, self).__init__(parent) self.current_player = current_player self.color = ["black", "green"] self.score = {-1: 0, 1: 0} self.shape = shape self.setFixedSize(100 * shape[1], 100 * shape[0]) grid_layout = QGridLayout() grid_layout.setSpacing(0) self.squares = list() self._board = Board(shape) for i in range(shape[0]): temp = list() for j in range(shape[1]): square = Square(i, j) grid_layout.addWidget(square, shape[0] - i, j) temp.append(square) self.squares.append(temp) self.set_default_colors() self.setLayout(grid_layout)
def eval_agents(agents, num_games=9): """ Eval the performance of two agents by multiple game simulation. Params: agents: [agent1, agent2] num_games: number of games simulated, default to BO9. board: a pre-init board can be passed to avoid re-construct. Returns: [win_rate(a) for a in agents] """ print("---------Evaluating agents-------------") board = Board() players = [Player.black, Player.white] win_cnts = np.zeros(2) for i in range(num_games): winner = dual_play(dict(zip(players, agents)), board) try: win_idx = players.index(winner) win_cnts[win_idx] += 1 print("Round {} ends, winner is <{}: {}>;".format(i + 1, winner, agents[win_idx])) except ValueError: # tie win_cnts += 0.5 print("Round {} ends, tie game;".format(i + 1)) players.reverse() # exchange the start player board.reset() [agent.reset() for agent in agents] win_rates = win_cnts / num_games print("Win rate:") print("{}: {}".format(agents[0], win_rates[0])) print("{}: {}".format(agents[1], win_rates[1])) print("---------------------------------------") return tuple(win_rates)
def reset_board(self): self._board = Board(self.shape) for i in range(self.shape[0]): for j in range(self.shape[1]): self.squares[i][j].remove_piece()
def resource_path(relative_path): try: # PyInstaller creates a temp folder and stores path in _MEIPASS base_path = sys._MEIPASS except Exception: base_path = os.path.abspath(".") return os.path.join(base_path, relative_path) if __name__ == "__main__": setting = Setting(resource_path("setting.config")) screen_resolution = setting.get_screen_resolution() grid = setting.get_grid() pygame.init() game_logic = GameLogic(Board(grid[0], grid[1]), setting.get_n()) game_logic.init() screen = pygame.display.set_mode(screen_resolution) cell_size = get_cell_size(screen_resolution[0], screen_resolution[1], game_logic.get_board().get_size()) game_loop(screen, cell_size, game_logic)
import time from core import Window, Keyboard, Board, ScoreBoard window = Window(1200, 800, 'Welcome to Reversi AI', 'resources/images/background_100x100.png') keyboard = Keyboard() board = Board(window, 2, [0], ['Black', 'White'], 8, 8, 1, ('resources/images/black_82x82.png', \ 'resources/images/white_82x82.png', 'resources/images/board_82x82_b1.png'), \ 'resources/images/cursor_82x82.png') scoreboard = ScoreBoard(window, 2, board, ('resources/images/black_82x82.png', \ 'resources/images/white_82x82.png', 'resources/images/background_100x100.png')) def main(): while True: if not keyboard.monitor(onkeydown_callback=board.update): window.quit() exit(0) if board.is_locked(): time.sleep(2) board.reset_lock() if board.is_ending(): break board.action(callbacks=(scoreboard.update, )) if not window.done_background: window.draw_background() board.draw_self() scoreboard.draw_self()
from core import Board, Player, Evaluator def show_board(board): black_stonenum = len([1 for i in board.stones if i == -1]) white_stonenum = len([1 for i in board.stones if i == 1]) print("black: {0} vs {1} :white".format(black_stonenum, white_stonenum)) dispboard = ["2" if stone == -1 else "1" if stone == 1 else " " for stone in board.stones] for i in xrange(0, 8): offset = i * 8 print(dispboard[offset:offset+8]) if __name__ == '__main__': evaluator = Evaluator() player = Player(evaluator) board = Board() while not board.check_gameover(): mpos = player.move(board, -1) show_board(board) epos = player.move(board, 1) show_board(board)
import readline import code from core import Board b = Board(seed=0) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) shell.interact()
def test_calc_score(self): b = Board() b.grid = self.grid_1 self.assertEqual(b.calc_score(), 134)
def test_spawn_minimum_1(self): b = Board(self.seed) b.grid = self.grid_stalemate b.spawn_minimum() self.assertTrue(b.gameover)
def test_init_board(self): b = Board(self.seed) grid = b.grid self.assertEqual(grid, self.initial_grid)
def test_empty_cells_1(self): b = Board() b.grid = self.grid_1 empty_cells = b.empty_cells() expected = {(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (3, 0), (3, 1), (3, 3)} self.assertEqual(empty_cells, expected)
def test_empty_cells_0(self): b = Board(self.seed) empty_cells = b.empty_cells() expected = {(i, j) for i in range(4) for j in range(4)} expected.remove((2, 2)) self.assertEqual(empty_cells, expected)