def generate(config, model_file, output_file): model = load_model(model_file) with open(output_file, 'ab') as fout: file_pos = fout.tell() # Truncate any partially written record fout.seek(file_pos - file_pos % record_size(config.size)) samples = 0 start_time = time.time() game_boards = numpy.array( [new_board(config.size) for i in range(config.batch_size)]) game_moves = [[] for i in range(config.batch_size)] while True: _values, priors = model.predict(game_boards) priors = numpy.reshape(priors, (-1, config.size, config.size)) for i in range(config.batch_size): probs = fix_probabilities(game_boards[i], priors[i]) move = sample_move(probs) game_moves[i].append(move) game_boards[i] = make_move(game_boards[i], move) if winner(game_boards[i]): samples += 1 board, won, visits = game_result(config, model, game_moves[i]) write_record(fout, board, won, visits) fout.flush() print_board(board, file=sys.stderr) print('Games: %d, Time per game: %.2fs' % (samples, (time.time() - start_time) / samples), file=sys.stderr) game_boards[i] = new_board(config.size) game_moves[i] = []
def play(): cur_player = input('Would you like to be X or O: ').upper() main_board = game.new_board() while True: game.print_board(main_board) move_str = input( 'Where would you like to place an %s? (e.g. 12 for 1st row, 2nd column) ' % cur_player) move_row = int(move_str[0]) - 1 move_col = int(move_str[1]) - 1 main_board = game.make_move(main_board, move_row, move_col, cur_player) if game.any_win(main_board, cur_player): game.print_board(main_board) print( 'Congratulations Player %s! Better luck next time player %s' % (cur_player, game.other_player(cur_player))) break elif game.board_full(main_board): game.print_board(main_board) print('No more moves possible. The game ended in a draw.') break cur_player = game.other_player(cur_player) if input('Would you like to play again? [y/n]') == 'y': play() else: print('Thanks for playing.') return 0
def compare(config, model_file1, model_file2): models = [load_model(model_file1), load_model(model_file2)] games = 0 first_player_wins = 0 win_ratio, uncertainty = None, None while True: move_index = 0 predictors = [TreeSearchPredictor(config.search_config, model, new_board(config.size), True) for model in models] while not winner(predictors[0].board): if move_index == 0: predictor = predictors[1] else: predictor = predictors[(games ^ move_index) & 1] predictor.run(config.iterations) value, probabilities = predictor.predict() if move_index < 3: move = sample_move(probabilities) else: move = best_move(probabilities) for predictor in predictors: predictor.make_move(move) if games & 1 == move_index & 1: print_board(flip(predictors[0].board), move, file=sys.stderr) else: print_board(predictors[0].board, flip_move(move), file=sys.stderr) print('%s model win probability: %.2f' % (['First', 'Second'][((games ^ move_index) & 1)], (value + 1) / 2), file=sys.stderr) if games > 0: print('Win ratio %.2f ± %.2f (%d games)' % (win_ratio, uncertainty, games), file=sys.stderr) move_index += 1 games += 1 if games & 1 == move_index & 1: first_player_wins += 1 win_ratio = float(first_player_wins) / games uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games)
def game_result(config, model, moves): last_move_index = len(moves) - 1 end = random.randint(0, last_move_index) board = new_board(config.size) for move in moves[:end]: board = make_move(board, move) predictor = TreeSearchPredictor(config.search_config, model, board, end == 0) predictor.run(config.iterations) return board, last_move_index % 2 == end % 2, predictor.visits()
def read_record(f, size): record = f.read(record_size(size)) board = new_board(size) visits = numpy.zeros((size, size)) offset = 0 for x in range(size): for y in range(size): cell = short.unpack_from(record, offset)[0] offset += short.size board[0, x, y] = (cell >> 6) & 1 board[1, x, y] = (cell >> 7) & 1 visits[x, y] = cell & ((1 << 6) - 1) won = boolean.unpack_from(record, offset)[0] return board, won, visits
def simulate(self, n): if n == 0: yield 0, 0, 0 won = 0 lost = 0 for i in range(n): board = game.new_board() moves_deep = 0 while True: board = self.actor(board) moves_deep += 1 game.add_cell(board) if game.is_board_won(board): yield 1, 0, moves_deep break if game.is_board_lost(board): yield 0, 1, moves_deep break
def clear_board(self): self.board = new_board(self.config.size) self.history = [] return ''
def compare(config, model_file1, model_file2, temp, num_games): models = [load_model(model_file1), load_model(model_file2)] games = 0 first_player_wins = 0 win_ratio, uncertainty = None, None ratios = [] # while True: for i in range(num_games): move_index = 0 predictors = [ TreeSearchPredictor(config.search_config, model, new_board(config.size), True) for model in models ] # exp uct of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.uct_factor = 5.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.uct_factor = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp uct of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.uct_factor = 5.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.uct_factor = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp virtual loss of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.virtual_loss = 3.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.virtual_loss = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp virtual loss of model2 is 0 # predictors = ["avshalom", "shlomo"] # config.search_config.virtual_loss = 3.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.virtual_loss = 0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) while not winner(predictors[0].board): if move_index == 0: predictor = predictors[1] else: predictor = predictors[(games ^ move_index) & 1] predictor.run(config.iterations) value, probabilities = predictor.predict() # exp uniform probs #uprobs = [0.00826446] * 121 #if games & 1 == move_index & 1: # probabilities = np.array(uprobs).reshape(11, -1) # exp temperature tprobs = temperature(probabilities, temp) if games & 1 == move_index & 1: probabilities = tprobs #print(probabilities) if move_index < 3: move = sample_move(probabilities) else: #move = best_move(probabilities) move = sample_move(probabilities) for predictor in predictors: predictor.make_move(move) if games & 1 == move_index & 1: print_board(flip(predictors[0].board), move, file=sys.stderr) else: print_board(predictors[0].board, flip_move(move), file=sys.stderr) print('%s model win probability: %.2f' % (['First', 'Second'][((games ^ move_index) & 1)], (value + 1) / 2), file=sys.stderr) if games > 0: print('Win ratio %.2f ± %.2f (%d games)' % (win_ratio, uncertainty, games), file=sys.stderr) move_index += 1 games += 1 if games & 1 == move_index & 1: first_player_wins += 1 win_ratio = float(first_player_wins) / games uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games) ratios.append(win_ratio) return ratios