def genmove(self, player): if winner(self.board) or winner(flip(self.board)): raise GtpException('Game is over') self.history.append(self.board) if player == 'black': # TODO: reuse previous calculations from the MCTS predictor = TreeSearchPredictor(self.config.search_config, self.model, self.board, not self.history) predictor.run(self.config.iterations) value, probabilities = predictor.predict() move = best_move(probabilities) self.board = flip(make_move(self.board, move)) elif player == 'white': predictor = TreeSearchPredictor(self.config.search_config, self.model, flip(self.board), not self.history) predictor.run(self.config.iterations) value, probabilities = predictor.predict() move = best_move(probabilities) self.board = make_move(flip(self.board), move) move = flip_move(move) else: self.history.pop() raise GtpException('Player is invalid') print('Estimated value: %.2f' % value, file=sys.stderr) return write_move(move)
def play(self, player, move): move = read_move(move) self.history.append(numpy.copy(self.board)) if player == 'black': self.board = flip(make_move(self.board, move)) elif player == 'white': self.board = make_move(flip(self.board), flip_move(move)) else: self.history.pop() raise GtpException('Player is invalid') return ''
def winner(self): if winner(self.board): return 'white' elif winner(flip(self.board)): return 'black' else: return 'none'
def compare(config, model_file1, model_file2): models = [load_model(model_file1), load_model(model_file2)] games = 0 first_player_wins = 0 win_ratio, uncertainty = None, None while True: move_index = 0 predictors = [TreeSearchPredictor(config.search_config, model, new_board(config.size), True) for model in models] while not winner(predictors[0].board): if move_index == 0: predictor = predictors[1] else: predictor = predictors[(games ^ move_index) & 1] predictor.run(config.iterations) value, probabilities = predictor.predict() if move_index < 3: move = sample_move(probabilities) else: move = best_move(probabilities) for predictor in predictors: predictor.make_move(move) if games & 1 == move_index & 1: print_board(flip(predictors[0].board), move, file=sys.stderr) else: print_board(predictors[0].board, flip_move(move), file=sys.stderr) print('%s model win probability: %.2f' % (['First', 'Second'][((games ^ move_index) & 1)], (value + 1) / 2), file=sys.stderr) if games > 0: print('Win ratio %.2f ± %.2f (%d games)' % (win_ratio, uncertainty, games), file=sys.stderr) move_index += 1 games += 1 if games & 1 == move_index & 1: first_player_wins += 1 win_ratio = float(first_player_wins) / games uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games)
def compare(config, model_file1, model_file2, temp, num_games): models = [load_model(model_file1), load_model(model_file2)] games = 0 first_player_wins = 0 win_ratio, uncertainty = None, None ratios = [] # while True: for i in range(num_games): move_index = 0 predictors = [ TreeSearchPredictor(config.search_config, model, new_board(config.size), True) for model in models ] # exp uct of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.uct_factor = 5.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.uct_factor = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp uct of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.uct_factor = 5.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.uct_factor = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp virtual loss of model2 is 100 # predictors = ["avshalom", "shlomo"] # config.search_config.virtual_loss = 3.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.virtual_loss = 100.0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) # exp virtual loss of model2 is 0 # predictors = ["avshalom", "shlomo"] # config.search_config.virtual_loss = 3.0 # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True) # config.search_config.virtual_loss = 0 # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True) while not winner(predictors[0].board): if move_index == 0: predictor = predictors[1] else: predictor = predictors[(games ^ move_index) & 1] predictor.run(config.iterations) value, probabilities = predictor.predict() # exp uniform probs #uprobs = [0.00826446] * 121 #if games & 1 == move_index & 1: # probabilities = np.array(uprobs).reshape(11, -1) # exp temperature tprobs = temperature(probabilities, temp) if games & 1 == move_index & 1: probabilities = tprobs #print(probabilities) if move_index < 3: move = sample_move(probabilities) else: #move = best_move(probabilities) move = sample_move(probabilities) for predictor in predictors: predictor.make_move(move) if games & 1 == move_index & 1: print_board(flip(predictors[0].board), move, file=sys.stderr) else: print_board(predictors[0].board, flip_move(move), file=sys.stderr) print('%s model win probability: %.2f' % (['First', 'Second'][((games ^ move_index) & 1)], (value + 1) / 2), file=sys.stderr) if games > 0: print('Win ratio %.2f ± %.2f (%d games)' % (win_ratio, uncertainty, games), file=sys.stderr) move_index += 1 games += 1 if games & 1 == move_index & 1: first_player_wins += 1 win_ratio = float(first_player_wins) / games uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games) ratios.append(win_ratio) return ratios