def test_make_dataset_from_sgf(self): with tempfile.NamedTemporaryFile() as sgf_file, \ tempfile.NamedTemporaryFile() as record_file: sgf_file.write(TEST_SGF.encode('utf8')) sgf_file.seek(0) preprocessing.make_dataset_from_sgf( sgf_file.name, record_file.name) recovered_data = self.extract_data(record_file.name) start_pos = go.Position() first_move = coords.from_sgf('fd') next_pos = start_pos.play_move(first_move) second_move = coords.from_sgf('cf') expected_data = [ ( features.extract_features(start_pos), preprocessing._one_hot(coords.to_flat(first_move)), -1 ), ( features.extract_features(next_pos), preprocessing._one_hot(coords.to_flat(second_move)), -1 )] self.assertEqualData(expected_data, recovered_data)
def play(model, p_fail_count): history = [] state = go.Position() while True: if state.is_game_over(): break scores = pv_mcts_scores(model, state, SP_TEMPERATURE, p_fail_count) policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(get_legal_actions(state.all_legal_moves()), scores): policies[action] = policy x = features.extract_features(state, features.AGZ_FEATURES) history.append([x, policies, None]) pv_mcts_coord = None action = np.random.choice(get_legal_actions(state.all_legal_moves()), p=scores) if action == (go.N * go.N): pv_mcts_coord = None else: coord_row = action // go.N coord_column = action % go.N pv_mcts_coord = (coord_row, coord_column) state = state.play_move(pv_mcts_coord) value = state.result() for i in range(len(history)): history[i][2] = value value = -value return history
def test_replay_position(self): sgf_positions = list(replay_sgf(NO_HANDICAP_SGF)) initial = sgf_positions[0] self.assertEqual(initial.metadata.result, 'W+1.5') self.assertEqual(initial.metadata.board_size, 9) self.assertEqual(initial.position.komi, 6.5) final = sgf_positions[-1].position # sanity check to ensure we're working with the right position final_board = load_board(''' .OXX..... O.OX.X... .OOX..... OOOOXXXXX XOXXOXOOO XOOXOO.O. XOXXXOOXO XXX.XOXXO X..XOO.O. ''') expected_final_position = go.Position( final_board, n=62, komi=6.5, caps=(3, 2), ko=None, recent=tuple(), to_play=go.BLACK ) self.assertEqualPositions(expected_final_position, final) self.assertEqual(final.n, len(final.recent)) replayed_positions = list(replay_position(final)) for sgf_pos, replay_pos in zip(sgf_positions, replayed_positions): self.assertEqualPositions(sgf_pos.position, replay_pos.position)
def test_replay_position(self): sgf_positions = list( sgf_wrapper.replay_sgf(utils_test.BOARD_SIZE, NO_HANDICAP_SGF)) initial = sgf_positions[0] self.assertEqual(initial.result, go.WHITE) final = sgf_positions[-1].position.play_move( sgf_positions[-1].next_move) # sanity check to ensure we're working with the right position final_board = utils_test.load_board(''' .OXX..... O.OX.X... .OOX..... OOOOXXXXX XOXXOXOOO XOOXOO.O. XOXXXOOXO XXX.XOXXO X..XOO.O. ''') expected_final_position = go.Position(utils_test.BOARD_SIZE, final_board, n=62, komi=6.5, caps=(3, 2), ko=None, recent=tuple(), to_play=go.BLACK) self.assertEqualPositions(expected_final_position, final) self.assertEqual(final.n, len(final.recent)) replayed_positions = list( go.replay_position(utils_test.BOARD_SIZE, final, 1)) for sgf_pos, replay_pos in zip(sgf_positions, replayed_positions): self.assertEqualPositions(sgf_pos.position, replay_pos.position)
ALMOST_DONE_BOARD = test_utils.load_board(''' .XO.XO.OO X.XXOOOO. XXXXXOOOO XXXXXOOOO .XXXXOOO. XXXXXOOOO .XXXXOOO. XXXXXOOOO XXXXOOOOO ''') TEST_POSITION = go.Position(board=ALMOST_DONE_BOARD, n=105, komi=2.5, caps=(1, 4), ko=None, recent=(go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8))), to_play=go.BLACK) SEND_TWO_RETURN_ONE = go.Position(board=ALMOST_DONE_BOARD, n=75, komi=0.5, caps=(0, 0), ko=None, recent=(go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8)), go.PlayerMove(go.BLACK, (1, 0))), to_play=go.WHITE)
from test_utils import load_board, GoPositionTestCase go.set_board_size(9) EMPTY_ROW = '.' * go.N + '\n' TEST_BOARD = load_board(''' .X.....OO X........ XXXXXXXXX ''' + EMPTY_ROW * 6) TEST_POSITION = go.Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1,2), ko=None, recent=(go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8)), go.PlayerMove(go.BLACK, (1, 0))), to_play=go.BLACK, ) TEST_BOARD2 = load_board(''' .XOXXOO.. XO.OXOX.. XXO..X... ''' + EMPTY_ROW * 6) TEST_POSITION2 = go.Position( board=TEST_BOARD2, n=0,
new_vars.append( tf.Variable(v, name=name.replace('PolicNetwork', 'PlayerNetwork'))) saver = tf.train.Saver(new_vars) sess.run(tf.global_variables_initializer()) saver.save(sess, os.path.join(save_dir, str(t), 'player' + str(t) + '.ckpt')) g1 = tf.Graph() with g1.as_default(): train_net = PolicyNetwork(scope="PolicNetwork") train_net.initialize_variables('model/sl/epoch_48.ckpt') pos = go.Position() train_net.run(pos) g2 = tf.Graph() with g2.as_default(): player_net = PolicyNetwork(scope="PlayerNetwork") player_net.initialize_variables('model/rl/2/player2.ckpt') pos = go.Position() player_net.run(pos) save_trained_policy(1, 'model/rl') print("===========load new model=================") g2 = tf.Graph() with g2.as_default(): player_net = PolicyNetwork(scope="PlayerNetwork")
.XXOOOOOO X.XOO...O .XXOO...O X.XOO...O .XXOO..OO X.XOOOOOO .XXOOOOOO X.XXXXXXX XXXXXXXXX ''') SEND_TWO_RETURN_ONE = go.Position( board=ALMOST_DONE_BOARD, n=70, komi=2.5, caps=(1, 4), ko=None, recent=(go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8))), to_play=go.BLACK ) class DummyNet(): def __init__(self, fake_priors=None, fake_value=0): if fake_priors is None: fake_priors = np.ones((go.N ** 2) + 1) / (go.N ** 2 + 1) self.fake_priors = fake_priors self.fake_value = fake_value def run(self, position): return self.fake_priors, self.fake_value
value = value.detach().numpy() if use_random_symmetry: probabilities = symmetries.invert_symmetries_pi( syms_used, probabilities) return probabilities, value.flatten() def bootstrap(working_dir): raise NotImplementedError if __name__ == "__main__": input_ = np.zeros((1, 18, 19, 19)).astype(np.float32) input_[:, 16, :, :] = 1. # batch = torch.from_numpy(input_) fn = "/private/home/zhuoyuan/AlphaGo/ELF2_models/save-1661000.bin" # fn = "/Users/zhuoyuan/Exp/AlphaGo/ELF2_models/save-1661000.bin" model = DualNetwork(fn) # to run directly # res = model.model(batch) position = go.Position() prob, val = model.run(position) for i in range(19): print(prob[i * 19:(i + 1) * 19]) print(prob[-1]) print(val)
import go import numpy as np state = go.Position() def random_action(state): legal_actions = state.all_legal_moves() sum_legal_actions = np.sum(legal_actions) possibilities = legal_actions / sum_legal_actions num = np.random.choice(len(legal_actions), p=possibilities) # if 9x9 -> 0 ~ 81 if num == len(legal_actions) - 1: return None row = num // go.N column = num % go.N coord = (row, column) return coord if __name__ == '__main__': while True: if state.is_game_over(): print(state.result_string()) break action = random_action(state)
import time import go import strategies import policy import sgf_wrapper import load_data_sets import utils net = policy.PolicyNetwork() net.initialize_variables('/Users/brilee/dev/MuGo/saved_models/20170718') now = time.time() positions = [go.Position(to_play=go.WHITE) for i in range(1)] # neural net 1 always plays "black", and variety is accomplished by # letting white play first half the time. strategies.simulate_many_games(net, net, positions) print(time.time() - now) now = time.time() def get_winrate(final_positions): black_win = [utils.parse_game_result(pos.result()) == go.BLACK for pos in final_positions] return sum(black_win) / len(black_win) def extract_moves(final_positions): winning_moves = [] losing_moves = [] for final_position in final_positions: positions_w_context = utils.take_n( strategies.POLICY_CUTOFF_DEPTH,
def playMatches(EPISODES, logger, turns_until_tau0, goes_first=0): BOARD = np.array( [[0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0]]) board = [] for r in range(19): for c in range(19): board.append(BOARD[r, c]) board.append(0) board.append(0) board = np.array(board) env = Game(board) player1 = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT) player2 = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT) scores = {player1.name: 0, "drawn": 0, player2.name: 0} sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0} points = {player1.name: [], player2.name: []} pos = go.Position(board=BOARD) for e in range(EPISODES): print(str(e + 1) + ' ', end='') game_start = time.time() state = env.reset(board) done = 0 turn = 0 player1.mcts = None player2.mcts = None if goes_first == 0: player1Starts = random.randint(0, 1) * 2 - 1 else: player1Starts = goes_first if player1Starts == 1: players = { 1: { "agent": player1, "name": player1.name }, -1: { "agent": player2, "name": player2.name } } else: players = { 1: { "agent": player2, "name": player2.name }, -1: { "agent": player1, "name": player1.name } } env.gameState.render() perv_turn = time.time() while done == 0: turn = turn + 1 #### Run the MCTS algo and return an action action, pos = players[state.playerTurn]['agent'].act(pos, state) print(pos.score()) this_turn = time.time() print() print() print() print("************************* TURN: ", turn, " *************************") print("This turn took ", (this_turn - perv_turn) / 60, " mins") print("It has been ", (this_turn - game_start) / 60, " mins from the start of the game") perv_turn = time.time() ### Do the action state, value, done, _ = env.step( action ) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move value = pos.score() env.gameState.render() #Send state to GUI lib state.arr = ((19 * 19) * ctypes.c_int)() data = np.copy(state.board[0:361]) estimated_board_score = 0 for i, v in enumerate(data): state.arr[i] = v score = _estimator_so.estimate(19, 19, state.arr, state.playerTurn, 1000, ctypes.c_float(0.4)) data[:] = state.arr current_player_score = state.playerTurn * score other_player_score = -state.playerTurn * score estimated_board_score = (current_player_score, current_player_score, other_player_score) print("Estimated board score: ", estimated_board_score) if done == 1: print("Done") if value > 0: print('%s WINS!', players[state.playerTurn]['name']) # logger.info('Player %d Wins!', state.playerTurn) # logger.info(str(value)) scores[players[state.playerTurn]['name']] = scores[players[ state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['sp'] = sp_scores['sp'] + 1 else: sp_scores['nsp'] = sp_scores['nsp'] + 1 elif value < 0: print('%s WINS!', players[-state.playerTurn]['name']) # logger.info('Player %d Wins!', -state.playerTurn) # logger.info(str(value)) scores[players[-state.playerTurn]['name']] = scores[ players[-state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['nsp'] = sp_scores['nsp'] + 1 else: sp_scores['sp'] = sp_scores['sp'] + 1 else: print('DRAW...') scores['drawn'] = scores['drawn'] + 1 sp_scores['drawn'] = sp_scores['drawn'] + 1 pts = state.score points[players[state.playerTurn]['name']].append(pts[0]) points[players[-state.playerTurn]['name']].append(pts[1]) return (scores, points, sp_scores)
with gfile.FastGFile("../data/goai.pb", 'rb') as f: graph = tf.get_default_graph() graphdef = graph.as_graph_def() graphdef.ParseFromString(f.read()) _ = tf.import_graph_def(graphdef) summary_write = tf.summary.FileWriter("./log", graph) predict = graph.get_tensor_by_name('import/add_7:0') evalset = [] testfiles = [f for f in listdir('../data/estimate/') if f[-4:] == 'json'] for f in testfiles: with open('../data/estimate/' + f) as json_data: record = json.load(json_data) s = 0 parent = go.Position() while s < len(record) and s <= go.LN: position = go.Position() position.fromJSON(record[s]) position.parent = parent parent = position if position.vertex != 0: evalset.append(position) s += 1 right = 0 for pos in evalset: if pos.vertex != 0: x_data = np.zeros(go.LN, dtype=np.float32).reshape(1, 1, go.N, go.N) p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1
def test_add_child(self): root = mcts.MCTSNode(go.Position()) child = root.maybe_add_child(17) self.assertIn(17, root.children) self.assertEqual(root, child.parent) self.assertEqual(17, child.fmove)
import time import go import strategies import policy import sgf_wrapper import load_data_sets import utils net = policy.PolicyNetwork() net.initialize_variables('/Users/brilee/dev/MuGo/saved_models/20170718') now = time.time() positions = [ go.Position(to_play=go.BLACK if i % 2 == 0 else go.WHITE) for i in range(2) ] # neural net 1 always plays "black", and variety is accomplished by # letting white play first half the time. strategies.simulate_many_games(net, net, positions) print(time.time() - now) now = time.time() def get_winrate(final_positions): black_win = [ utils.parse_game_result(pos.result()) == go.BLACK for pos in final_positions ] return sum(black_win) / len(black_win) def extract_moves(final_positions):
async def main(): #initialize game global GameInfo env = None gamepos = None mode = -1 playerColor = "" oppcolor = "" playerName = "JoGo" playerTurn = 0 action = 0 sc = [0, 0, 0] ai = None while mode == -1: f = open("gui3.txt", "r") f1 = f.readlines() j = 0 for i in f1: if j == 0: #if mode =0 --> AI Vs AI else mode=1--> AI VS User mode = int(i) if j == 1: #PlayerNum=0 --> black(player1) 1--> white(player2) playerColor = str(i) j = j + 1 if mode == 0: print("AI vs AI") turn = 0 game_start = time.time() perv_turn = time.time() GameInfo.State = States.INIT while (1): print("Waiting for server") # print(type(GameInfo.RemainTime)) # print(GameInfo.score) if GameInfo.State == States.INIT: print("Initialized") await InitState(GameInfo, playerName) elif GameInfo.State == States.READY: await ReadyState(GameInfo) playerColor = GameInfo.PlayerColor if GameInfo.endgame == False: board = initialboard() if playerColor == "B": oppcolor = "W" playerTurn = 1 else: oppcolor = "B" playerTurn = -1 if env is None: env = Game(board, 1) env.gameState.render() ai = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT) gamepos = go.Position(board=initialdboard(board)) else: theend() elif GameInfo.State == States.IDLE: await IdleState(GameInfo) action = None baction = None if GameInfo.getOppMove(0) == -1: if playerColor == "B": action = 362 else: action = 361 elif GameInfo.getOppMove(0) == -2: pass else: baction = (int(GameInfo.getOppMove(0)), int(GameInfo.getOppMove(1))) action = int(GameInfo.getOppMove(0)) * 19 + int( GameInfo.getOppMove(1)) gamepos = gamepos.play_move(baction, env.gameState.playerTurn, mutate=True) env.step(action) turn = turn + 1 this_turn = time.time() print() print() print() print("************************* TURN: ", turn, " *************************") print("This turn took ", (this_turn - perv_turn) / 60, " mins") print("It has been ", (this_turn - game_start) / 60, " mins from the start of the game") env.gameState.render() env.gameState.renderThink() elif GameInfo.State == States.THINK: action, gamepos = ai.act(gamepos, env.gameState, turn) print(gamepos.score()) typ = 0 if action == 361: typ = 1 elif action == 362: typ = 1 else: typ = 0 y = action % 19 x = math.floor(action / 19) env.step(action) await ThinkState(GameInfo, x, y, typ) turn = turn + 1 this_turn = time.time() print() print() print() print("************************* TURN: ", turn, " *************************") print("This turn took ", (this_turn - perv_turn) / 60, " mins") print("It has been ", (this_turn - game_start) / 60, " mins from the start of the game") env.gameState.render() env.gameState.renderWait()
def clear(self): self.position = go.Position()
def clear(self): self.position = go.Position(komi=self.komi)
def testAgainstRandom(model, matches): #untrained models do not play at random, they have random weight initializations and then alawys play in terms of those #this function takes a model (a trained one) and plays it against a player who makes a random move every time. #it plays matches number of matches veteran = model veteranWins = 0 beginnerWins = 0 white = None black = None for i in range(matches): if i % 2 == 0: black = veteran else: white = veteran position = go.Position() while not position.is_game_over(): if position.n >= 100: position = position.pass_move() else: if position.to_play == 1: if black == veteran: boards, playerCaps, opponentCaps = gamesToData( [[position, 1]]) actions = black.callPol(boards, playerCaps, opponentCaps)[0] pdist = tf.nn.softmax( tf.cast(actions, dtype=tf.float64)) legalMoves = position.all_legal_moves() move = np.random.choice(np.arange(0, len(pdist)), p=pdist) if legalMoves[move] == 0: actions = actions * legalMoves move = tf.math.argmax(actions).numpy() position = position.play_move(coords.from_flat(move)) else: position = choose_and_play_move(position) else: if white == veteran: boards, playerCaps, opponentCaps = gamesToData( [[position, 1]]) actions = white.callPol(boards, playerCaps, opponentCaps)[0] pdist = tf.nn.softmax( tf.cast(actions, dtype=tf.float64)) legalMoves = position.all_legal_moves() move = np.random.choice(np.arange(0, len(pdist)), p=pdist) if legalMoves[move] == 0: actions = actions * legalMoves move = tf.math.argmax(actions).numpy() position = position.play_move(coords.from_flat(move)) else: position = choose_and_play_move(position) if black == veteran: if position.result() == 1: veteranWins += 1 elif position.result() == -1: beginnerWins += 1 else: print("No one wins!!") else: if position.result() == 1: beginnerWins += 1 elif position.result() == -1: veteranWins += 1 else: print("No one wins!!") print("The model wins " + str(veteranWins)) print("The random wins " + str(beginnerWins)) return veteranWins - beginnerWins