Ejemplo n.º 1
0
 def genmove(self, player):
     if winner(self.board) or winner(flip(self.board)):
         raise GtpException('Game is over')
     self.history.append(self.board)
     if player == 'black':
         # TODO: reuse previous calculations from the MCTS
         predictor = TreeSearchPredictor(self.config.search_config,
                                         self.model, self.board,
                                         not self.history)
         predictor.run(self.config.iterations)
         value, probabilities = predictor.predict()
         move = best_move(probabilities)
         self.board = flip(make_move(self.board, move))
     elif player == 'white':
         predictor = TreeSearchPredictor(self.config.search_config,
                                         self.model, flip(self.board),
                                         not self.history)
         predictor.run(self.config.iterations)
         value, probabilities = predictor.predict()
         move = best_move(probabilities)
         self.board = make_move(flip(self.board), move)
         move = flip_move(move)
     else:
         self.history.pop()
         raise GtpException('Player is invalid')
     print('Estimated value: %.2f' % value, file=sys.stderr)
     return write_move(move)
Ejemplo n.º 2
0
 def play(self, player, move):
     move = read_move(move)
     self.history.append(numpy.copy(self.board))
     if player == 'black':
         self.board = flip(make_move(self.board, move))
     elif player == 'white':
         self.board = make_move(flip(self.board), flip_move(move))
     else:
         self.history.pop()
         raise GtpException('Player is invalid')
     return ''
Ejemplo n.º 3
0
 def winner(self):
     if winner(self.board):
         return 'white'
     elif winner(flip(self.board)):
         return 'black'
     else:
         return 'none'
Ejemplo n.º 4
0
def compare(config, model_file1, model_file2):
    models = [load_model(model_file1), load_model(model_file2)]
    games = 0
    first_player_wins = 0
    win_ratio, uncertainty = None, None
    while True:
        move_index = 0
        predictors = [TreeSearchPredictor(config.search_config, model, new_board(config.size), True) for model in models]
        while not winner(predictors[0].board):
            if move_index == 0:
                predictor = predictors[1]
            else:
                predictor = predictors[(games ^ move_index) & 1]
            predictor.run(config.iterations)
            value, probabilities = predictor.predict()
            if move_index < 3:
                move = sample_move(probabilities)
            else:
                move = best_move(probabilities)
            for predictor in predictors:
                predictor.make_move(move)
            if games & 1 == move_index & 1:
                print_board(flip(predictors[0].board), move, file=sys.stderr)
            else:
                print_board(predictors[0].board, flip_move(move), file=sys.stderr)
            print('%s model win probability: %.2f' % (['First', 'Second'][((games ^ move_index) & 1)], (value + 1) / 2), file=sys.stderr)
            if games > 0:
                print('Win ratio %.2f ± %.2f (%d games)' % (win_ratio, uncertainty, games), file=sys.stderr)
            move_index += 1
        games += 1
        if games & 1 == move_index & 1:
            first_player_wins += 1
        win_ratio = float(first_player_wins) / games
        uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games)
Ejemplo n.º 5
0
def compare(config, model_file1, model_file2, temp, num_games):
    models = [load_model(model_file1), load_model(model_file2)]
    games = 0
    first_player_wins = 0
    win_ratio, uncertainty = None, None

    ratios = []
    # while True:
    for i in range(num_games):
        move_index = 0
        predictors = [
            TreeSearchPredictor(config.search_config, model,
                                new_board(config.size), True)
            for model in models
        ]

        # exp uct of model2 is 100
        # predictors = ["avshalom", "shlomo"]
        # config.search_config.uct_factor = 5.0
        # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True)
        # config.search_config.uct_factor = 100.0
        # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True)

        # exp uct of model2 is 100
        # predictors = ["avshalom", "shlomo"]
        # config.search_config.uct_factor = 5.0
        # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True)
        # config.search_config.uct_factor = 100.0
        # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True)

        # exp virtual loss of model2 is 100
        # predictors = ["avshalom", "shlomo"]
        # config.search_config.virtual_loss = 3.0
        # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True)
        # config.search_config.virtual_loss = 100.0
        # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True)

        # exp virtual loss of model2 is 0
        # predictors = ["avshalom", "shlomo"]
        # config.search_config.virtual_loss = 3.0
        # predictors[0] = TreeSearchPredictor(config.search_config, models[0], new_board(config.size), True)
        # config.search_config.virtual_loss = 0
        # predictors[1] = TreeSearchPredictor(config.search_config, models[1], new_board(config.size), True)

        while not winner(predictors[0].board):
            if move_index == 0:
                predictor = predictors[1]
            else:
                predictor = predictors[(games ^ move_index) & 1]
            predictor.run(config.iterations)
            value, probabilities = predictor.predict()

            # exp uniform probs
            #uprobs = [0.00826446] * 121
            #if games & 1 == move_index & 1:
            #   probabilities = np.array(uprobs).reshape(11, -1)

            # exp temperature
            tprobs = temperature(probabilities, temp)
            if games & 1 == move_index & 1:
                probabilities = tprobs
                #print(probabilities)

            if move_index < 3:
                move = sample_move(probabilities)
            else:
                #move = best_move(probabilities)
                move = sample_move(probabilities)
            for predictor in predictors:
                predictor.make_move(move)
            if games & 1 == move_index & 1:
                print_board(flip(predictors[0].board), move, file=sys.stderr)
            else:
                print_board(predictors[0].board,
                            flip_move(move),
                            file=sys.stderr)
            print('%s model win probability: %.2f' %
                  (['First', 'Second'][((games ^ move_index) & 1)],
                   (value + 1) / 2),
                  file=sys.stderr)
            if games > 0:
                print('Win ratio %.2f ± %.2f (%d games)' %
                      (win_ratio, uncertainty, games),
                      file=sys.stderr)
            move_index += 1
        games += 1
        if games & 1 == move_index & 1:
            first_player_wins += 1
        win_ratio = float(first_player_wins) / games
        uncertainty = win_ratio * math.sqrt(win_ratio *
                                            (1 - win_ratio) / games)

        ratios.append(win_ratio)

    return ratios