Beispiel #1
0
 def get_next_state(self, state, action):
     board = np.copy(state)
     OthelloGame.flip_board_squares(board, OthelloPlayer.BLACK, *action)
     if OthelloGame.has_player_actions_on_board(board, OthelloPlayer.WHITE):
         # Invert board to keep using BLACK perspective
         board = OthelloGame.invert_board(board)
     return board
Beispiel #2
0
def execute_episode(board_size, neural_network, degree_exploration,
                    num_simulations, policy_temperature, e_greedy):
    examples = []

    game = OthelloGame(board_size)

    mcts = OthelloMCTS(board_size, neural_network, degree_exploration)

    if neural_network.network_type == NeuralNets.ONN:
        board_view_type = BoardView.TWO_CHANNELS
    elif neural_network.network_type == NeuralNets.BNN:
        board_view_type = BoardView.ONE_CHANNEL

    while not game.has_finished():
        state = game.board(BoardView.TWO_CHANNELS)

        for _ in range(num_simulations):
            mcts.simulate(state, game.current_player)

        if game.current_player == OthelloPlayer.WHITE:
            state = OthelloGame.invert_board(state)

        policy = mcts.get_policy_action_probabilities(state,
                                                      policy_temperature)

        # e-greedy
        coin = random.random()

        if coin <= e_greedy:
            action = np.argwhere(policy == policy.max())[0]
        else:
            action = mcts.get_state_actions(state)[np.random.choice(
                len(mcts.get_state_actions(state)))]

        action_choosed = np.zeros((board_size, board_size))
        action_choosed[action[0]][action[1]] = 1

        example = game.board(
            board_view_type), action_choosed, game.current_player

        for board_example, policy_example in training_example_symmetries(
                game.board(board_view_type), action_choosed):
            example = board_example, policy_example, game.current_player
            examples.append(example)

        game.play(*action)

    winner, winner_points = game.get_winning_player()
    logging.info(
        f'Episode finished: The winner obtained {winner_points} points.')

    return [(state, policy, 1 if winner == player else -1)
            for state, policy, player in examples]
Beispiel #3
0
 def play(self):
     move_points = {}
     possible_moves = tuple(self.game.get_valid_actions())
     points_before = game.get_players_points()[game.current_player]
     board = self.game.board(BoardView)
     
     for move in possible_moves:
         state = np.copy(self.game.board(BoardView))
         OthelloGame.flip_board_squares(state, game.current_playe, *move)
         points = OthelloGame.get_board_players_points(state)[OthelloPlayer.BLACK] - points_before
         move_points[move] = points
     
     greedy_move = max(move_points, key=move_points.get)
     game.play(*greedy_move)
Beispiel #4
0
def evaluate_neural_network(board_size, total_iterations, neural_network,
                            num_simulations, degree_exploration, agent_class,
                            agent_arguments):
    net_wins = 0

    logging.info(f'Neural Network Evaluation: Started')

    for iteration in range(1, total_iterations + 1):

        game = OthelloGame(board_size)

        nn_agent = NeuralNetworkOthelloAgent(game, neural_network,
                                             num_simulations,
                                             degree_exploration)
        evaluation_agent = agent_class(game, *agent_arguments)

        agents = [evaluation_agent, nn_agent]
        random.shuffle(agents)

        agent_winner = duel_between_agents(game, *agents)

        if agent_winner is nn_agent:
            net_wins += 1
            logging.info(f'Neural Network Evaluation: Network won')
        else:
            logging.info(f'Neural Network Evaluation: Network lost')

    return net_wins
Beispiel #5
0
 def _neural_network_predict(self, state):
     hash_ = hash_ndarray(state)
     if hash_ not in self._predict_cache:
         if self._board_view_type == BoardView.ONE_CHANNEL:
             state = OthelloGame.convert_to_one_channel_board(state)
         self._predict_cache[hash_] = self._neural_network.predict(state)
     return self._predict_cache[hash_]
Beispiel #6
0
def duel_between_neural_networks(board_size, neural_network_1,
                                 neural_network_2, degree_exploration,
                                 num_simulations):
    game = OthelloGame(board_size)

    nn_1_agent = NeuralNetworkOthelloAgent(game, neural_network_1,
                                           num_simulations, degree_exploration)
    nn_2_agent = NeuralNetworkOthelloAgent(game, neural_network_2,
                                           num_simulations, degree_exploration)

    agents = {nn_1_agent: neural_network_1, nn_2_agent: neural_network_2}

    agent_winner = duel_between_agents(game, nn_1_agent, nn_2_agent)

    return 0 if agents[agent_winner] is neural_network_1 else 1
Beispiel #7
0
    def play(self):
        state = self.game.board(BoardView.TWO_CHANNELS)
        for _ in range(self.num_simulations):
            self.mcts.simulate(state, self.game.current_player)

        if self.game.current_player == OthelloPlayer.WHITE:
            state = OthelloGame.invert_board(state)
        
        if self.neural_network.network_type is NeuralNets.ONN:
            action_probabilities = self.mcts.get_policy_action_probabilities(state, self.temperature)
        else:
            action_probabilities = self.mcts.get_policy_action_probabilities(
                self.game.board(), self.temperature)

        valid_actions = self.game.get_valid_actions()
        best_action = max(valid_actions, key=lambda position: action_probabilities[tuple(position)])
        self.game.play(*best_action)
Beispiel #8
0
from Trainer import Trainer
from Othello import OthelloGame
from nnet import ConvNet

if __name__ == "__main__":
    game = OthelloGame(6)
    nnet = ConvNet(game)
    trainer = Trainer(game, nnet)
    trainer.train()
Beispiel #9
0
 def get_state_actions(self, state):
     return [
         tuple(a) for a in OthelloGame.get_player_valid_actions(
             state, OthelloPlayer.BLACK)
     ]
Beispiel #10
0
 def get_state_reward(self, state):
     return OthelloGame.get_board_winning_player(state)[0].value
Beispiel #11
0
 def is_terminal_state(self, state):
     return OthelloGame.has_board_finished(state)
Beispiel #12
0
 def simulate(self, state, player):
     board = np.copy(state)
     if player is OthelloPlayer.WHITE:
         board = OthelloGame.invert_board(board)
     return super().simulate(board)