コード例 #1
0
ファイル: test_bed.py プロジェクト: samdjstephens/reinforce
 def __init__(self, logger: Callable = noop_print, random_seed: int = 54):
     self.winners = []
     self.logger = logger
     self.agent = SarsaAgent(1)
     self.opponent = SarsaAgent(-1)
     self.game = TicTacToeGame()
     self.env = Environment(self.game, self.opponent)
     self._random = random.Random(random_seed)
コード例 #2
0
ファイル: test_bed.py プロジェクト: samdjstephens/reinforce
class TestBed(object):
    def __init__(self, logger: Callable = noop_print, random_seed: int = 54):
        self.winners = []
        self.logger = logger
        self.agent = SarsaAgent(1)
        self.opponent = SarsaAgent(-1)
        self.game = TicTacToeGame()
        self.env = Environment(self.game, self.opponent)
        self._random = random.Random(random_seed)

    def play_episodes(self, n=100):
        print('Episode: ')
        for i in range(n):
            if i % 100 == 0:
                print(f'{i+1}', end=' ')
            winner = self.play_episode()
            self.winners.append(winner)

    def play_episode(self):
        if self._random.random() < 0.5:
            # Let the opponent start 50% of the time
            self.env.opponent_play()

        state = self.env.state_rep()
        action = self.agent.act(state)

        for turn in range(1, 10):  # Should be done in 5
            reward = self.env.interact(action)
            new_state = self.env.state_rep()

            new_action = self.agent.act(new_state,
                                        self.game.available_positions())
            self.agent.get_feedback(state, action, reward, new_state,
                                    new_action)
            state = new_state
            action = new_action
            if self.env.is_episode_complete:
                break

        self.logger(f"Episode complete. {turn} turns taken.", end=' ')

        result = self.game.winner()

        if result:
            self.logger(f"{self.game.repr_marker(result)} won!")
        else:
            self.logger("It was a draw")
        self.logger(self.env, end='\n\n\n\n')
        self.reset()
        return result

    def reset(self):
        self.game.reset()
        self.agent.reset()
        self.opponent.reset()
コード例 #3
0
    def test_check_game_over_1(self):
        """Test case for the check_game_over function.

        Test for game over with a win.
        """
        game = TicTacToeGame()
        game.state = [[1, 0, -1], [1, 0, -1], [1, 0, -1]]
        game_over, value = game.check_game_over(1)

        self.assertEqual(game_over, True)
        self.assertEqual(value, 1)
コード例 #4
0
    def test_check_game_over_4(self):
        """Test case for the check_game_over function.

        Test for game not over.
        """
        game = TicTacToeGame()
        game.state = [[-1, 0, 0], [0, -1, 0], [0, 1, 0]]
        game_over, value = game.check_game_over(-1)

        self.assertEqual(game_over, False)
        self.assertEqual(value, 0)
コード例 #5
0
    CFG.dirichlet_alpha = arguments.dirichlet_alpha
    CFG.epsilon = arguments.epsilon
    CFG.model_directory = arguments.model_directory
    CFG.num_eval_games = arguments.num_eval_games
    CFG.eval_win_rate = arguments.eval_win_rate
    CFG.load_model = arguments.load_model
    CFG.human_play = arguments.human_play
    CFG.resnet_blocks = arguments.resnet_blocks
    CFG.record_loss = arguments.record_loss
    CFG.loss_file = arguments.loss_file
    CFG.game = arguments.game

    # Initialize the game object with the chosen game.
    game = object
    if CFG.game == 0:
        game = TicTacToeGame()
    elif CFG.game == 1:
        game = OthelloGame()
    elif CFG.game == 2:
        game = ConnectFourGame()
    elif CFG.game == 3:
        game = RubiksCubeGame()

    net = NeuralNetworkWrapper(game)

    # Initialize the network with the best model.
    if CFG.load_model:
        file_path = CFG.model_directory + "best_model.meta"
        if os.path.exists(file_path):
            net.load_model("best_model")
        else: