Ejemplo n.º 1
0
 def winner(self):
     if winner(self.board):
         return 'white'
     elif winner(flip(self.board)):
         return 'black'
     else:
         return 'none'
Ejemplo n.º 2
0
def generate(config, model_file, output_file):
    model = load_model(model_file)
    with open(output_file, 'ab') as fout:
        file_pos = fout.tell()
        # Truncate any partially written record
        fout.seek(file_pos - file_pos % record_size(config.size))
        samples = 0
        start_time = time.time()
        game_boards = numpy.array([new_board(config.size) for i in range(config.batch_size)])
        game_moves = [[] for i in range(config.batch_size)]
        while True:
            _values, priors = model.predict(game_boards)
            priors = numpy.reshape(priors, (-1, config.size, config.size))
            for i in range(config.batch_size):
                probs = fix_probabilities(game_boards[i], priors[i])
                move = sample_move(probs)
                game_moves[i].append(move)
                game_boards[i] = make_move(game_boards[i], move)
                if winner(game_boards[i]):
                    samples += 1
                    board, won, visits = game_result(config, model, game_moves[i])
                    write_record(fout, board, won, visits)
                    fout.flush()
                    print_board(board, file=sys.stderr)
                    print('Games: %d, Time per game: %.2fs' % (samples, (time.time() - start_time) / samples), file=sys.stderr)
                    game_boards[i] = new_board(config.size)
                    game_moves[i] = []
Ejemplo n.º 3
0
    async def visit(self, config, predictor, board, is_first_move):
        if self.visits == 1:
            self.is_won = winner(board)
        if self.is_won:
            predictor.release()
            self.visits += 1
            self.value += -1
            return -1
        if self.visits == 1:
            self.edges = []
            size = board.shape[1]
            for x in range(size):
                for y in range(size):
                    if board[0, x, y] == 0 and board[1, x, y] == 0:
                        self.edges.append(
                            Edge(config, self.priors[x, y], (x, y), self.t))
            self.priors = None
        self.visits += 1
        visits_sqrt = math.sqrt(self.visits)
        """
        insert temperature here
        """
        priorities = [edge.priority(visits_sqrt) for edge in self.edges]
        probabilities = softmax(priorities)
        temp_probabilities = temperature(probabilities, self.t)

        best_edge_index = numpy.random.choice(range(len(temp_probabilities)),
                                              p=temp_probabilities)
        best_edge = self.edges[best_edge_index]

        value = await best_edge.visit(config, predictor, board, is_first_move)
        self.value += -value
        return -value
Ejemplo n.º 4
0
 def genmove(self, player):
     if winner(self.board) or winner(flip(self.board)):
         raise GtpException('Game is over')
     self.history.append(self.board)
     if player == 'black':
         # TODO: reuse previous calculations from the MCTS
         predictor = TreeSearchPredictor(self.config.search_config, self.model, self.board, not self.history)
         predictor.run(self.config.iterations)
         value, probabilities = predictor.predict()
         move = best_move(probabilities)
         self.board = flip(make_move(self.board, move))
     elif player == 'white':
         predictor = TreeSearchPredictor(self.config.search_config, self.model, flip(self.board), not self.history)
         predictor.run(self.config.iterations)
         value, probabilities = predictor.predict()
         move = best_move(probabilities)
         self.board = make_move(flip(self.board), move)
         move = flip_move(move)
     else:
         self.history.pop()
         raise GtpException('Player is invalid')
     print('Estimated value: %.2f' % value, file=sys.stderr)
     return write_move(move)
Ejemplo n.º 5
0
def compare(config, model1, model2, t, T, num_games):
    games = 0
    first_player_wins = 0
    win_ratio, uncertainty = None, None

    ratios = []
    for i in range(num_games):
        move_index = 0
        predictors = [
            TreeSearchPredictor(config.search_config, model1,
                                new_board(config.size), True, t, T),
            TreeSearchPredictor(config.search_config, model2,
                                new_board(config.size), True)
        ]

        while not winner(predictors[0].board):
            if move_index == 0:
                predictor = predictors[1]
            else:
                predictor = predictors[(games ^ move_index) & 1]
            predictor.run(config.iterations)
            value, probabilities = predictor.predict()

            if games & 1 == move_index & 1:
                probabilities = temperature(probabilities, T)

            move = refined_move(probabilities)

            for predictor in predictors:
                predictor.make_move(move)

            move_index += 1
        games += 1
        if games & 1 == move_index & 1:
            first_player_wins += 1
        win_ratio = float(first_player_wins) / games
        uncertainty = win_ratio * math.sqrt(win_ratio *
                                            (1 - win_ratio) / games)

        ratios.append(win_ratio)
    return ratios
Ejemplo n.º 6
0
def compare(config, num_games, temp, Temp, name):
    print('\nplaying with', name, 'with vals:', temp, Temp)
    alpha_wins = 0
    wolve_wins = 0

    # while True:
    for i in range(num_games):
        # alpha_agent = TreeSearchPredictor(config.search_config, model, new_board(config.size), True, t, T)
        alpha_agent = TreeSearchPredictor(config.search_config, model,
                                          new_board(config.size), True, temp,
                                          Temp)

        # make sure wolve have new clear board
        wolve.clear_board()
        # start game
        print('its game number: ', i + 1)

        while not winner(alpha_agent.board):
            # alpha turn
            alpha_agent.run(config.iterations)
            value, probabilities = alpha_agent.predict()
            #print(probabilities)
            #probabilities = fix_probabilities(alpha_agent.board, probabilities)
            probabilities = fix_probabilities(alpha_agent.board, probabilities)
            #print(probabilities)
            alpha_move = best_move(probabilities)
            #print('alphaaaaa: ', alpha_move)
            alpha_agent.make_move(alpha_move)
            # insert move to wolve
            letter, number = alpha_move
            alpha_move = str(num_to_letter[letter]) + str(number + 1)
            #print(f'alpha(B): {alpha_move}')
            wolve.insert_move("black", alpha_move)
            if winner(alpha_agent.board):
                print("alpha wins!!!")
                alpha_wins += 1
                continue
            # wolve turn
            wolve_move = wolve.genmove("white")
            #print(f'wolve(W): {wolve_move}')
            letter = letter_to_num[wolve_move[0]]
            number = int(wolve_move[1:]) - 1
            #wolve_move = (letter, number)
            wolve_move = (number, letter)
            # insert wolve move to alpha
            alpha_agent.make_move(wolve_move)
            #print('wove board:')
            #print(wolve.showboard())
            # print('alpha board:')
            # print_board(flip(alpha_agent.board), wolve_move, file=sys.stderr)
            if winner(alpha_agent.board):
                print("wolve wins!!!")
                wolve_wins += 1
                continue

        print(name, 'won', alpha_wins, 'out of', i + 1)
        print('wolve won', wolve_wins, 'out of', i + 1)

    print(name, 'won', alpha_wins, 'times out of', num_games, 'games')
    print('wolve won', wolve_wins, 'times out of', num_games, 'games')
    return alpha_wins / num_games