Example #1
0
    def test_simple_game(self):
        layout = """
        ##########
        #        #
        #0  ..  1#
        ##########
        """
        server = SimpleServer(
            layout_string=layout,
            rounds=5,
            players=2,
            bind_addrs=("ipc:///tmp/pelita-testplayer1-%s" % uuid.uuid4(),
                        "ipc:///tmp/pelita-testplayer2-%s" % uuid.uuid4()))

        for bind_address in server.bind_addresses:
            self.assertTrue(bind_address.startswith("ipc://"))

        client1_address = server.bind_addresses[0]
        client2_address = server.bind_addresses[1]

        client1 = SimpleClient(SimpleTeam("team1", RandomPlayer()),
                               address=client1_address)
        client2 = SimpleClient(SimpleTeam("team2", RandomPlayer()),
                               address=client2_address)

        client1.autoplay_process()
        client2.autoplay_process()
        server.run()
        server.shutdown()
def monte_carlo_eval(original_game,
                     player_number,
                     main_player=RandomPlayer(),
                     rewards=(1, -1, .5),
                     simulation_amount=100,
                     depth=0,
                     opponent=RandomPlayer()):
    '''Returns a MonteCarloEvaluation object
    Plays random games from the given game position and averages the results

    original_game is the game to simulate from
    player_number is the number of the player whose persepective we're evaluating the game from
    main_player is a Player object that we use to simulate the moves of the player with player_number
    rewards is a tuple (or list): (points for winning, points for losing, points for tying)
    simulation_amount is how many games to simulate per possible game at the specified depth
    depth is how many layers down you want it to start simulating
    opponent is the  Player object we use to simulate the games of the player with the number that's not player_number
    '''

    if depth == 0:
        value = 0
        for _ in range(simulation_amount):
            game = original_game.get_copy()
            # set up the players
            players = [None, None]
            players[player_number] = main_player
            players[original_game.get_other_player(player_number)] = opponent

            # play out a game
            while game.who_won() is None:
                players[game.active_player].make_move(game)

            # update the value
            value += simple_eval(game, player_number, rewards).value

        # average the games' scores
        return MonteCarloEvaluation(value / simulation_amount,
                                    simulation_amount)
    else:
        winner = original_game.who_won()
        if winner is None:
            # list of MonteCarloEvaluation objects for each game
            lower_level = [
                monte_carlo_eval(game, player_number, main_player, rewards,
                                 simulation_amount, depth - 1, opponent)
                for game in original_game.get_next_level()
            ]
            # average the values across the same level
            return MonteCarloEvaluation(
                sum([e.value for e in lower_level]) / len(lower_level),
                sum([e.simulations for e in lower_level]))
        else:
            # game is finished, so use winner_eval
            return MonteCarloEvaluation(
                winner_eval(winner, player_number, rewards).value,
                simulation_amount)
Example #3
0
    def test_random_seeds(self):
        test_layout = (""" ################
            #              #
            #              #
            #              #
            #   0      1   #
            #   2      3   #
            #              #
            #              #
            #.            .#
            ################ """)
        players_a = [RandomPlayer() for _ in range(4)]

        team_1 = [
            SimpleTeam(players_a[0], players_a[2]),
            SimpleTeam(players_a[1], players_a[3])
        ]
        gm1 = GameMaster(test_layout, team_1, 4, 5, seed=20)
        gm1.set_initial()
        random_numbers_a = [
            player.rnd.randint(0, 10000) for player in players_a
        ]
        # check that each player has a different seed (if randomness allows)
        self.assertEqual(len(set(random_numbers_a)), 4,
                         "Probably not all player seeds were unique.")

        players_b = [RandomPlayer() for _ in range(4)]

        team_2 = [
            SimpleTeam(players_b[0], players_b[2]),
            SimpleTeam(players_b[1], players_b[3])
        ]
        gm2 = GameMaster(test_layout, team_2, 4, 5, seed=20)
        gm2.set_initial()
        random_numbers_b = [
            player.rnd.randint(0, 10000) for player in players_b
        ]
        self.assertEqual(random_numbers_a, random_numbers_b)

        players_c = [RandomPlayer() for _ in range(4)]

        team_3 = [
            SimpleTeam(players_c[0], players_c[2]),
            SimpleTeam(players_c[1], players_c[3])
        ]
        gm3 = GameMaster(test_layout, team_3, 4, 5, seed=200)
        gm3.set_initial()
        random_numbers_c = [
            player.rnd.randint(0, 10000) for player in players_c
        ]

        self.assertNotEqual(random_numbers_a, random_numbers_c)
Example #4
0
def run():
    print("WELCOME TO TIC-TAC-TOE!")
    print("X------------------------------------------------------------------------------------------------X")
    print("You can choose player 1 or player 2. They can be Human(H), Random Computer Moves (R) or AI (AI). Player 1 uses the X token "
        "and player 2 uses O. Both players are Human by default.")
    print("The Human Player is controlled by you. The AI is unbeatable and chooses its own moves. Random chooses any move on the board"
        " at random.")
    print("X------------------------------------------------------------------------------------------------X")

    p1 = input("Choose Player 1 (H/AI/R): ").strip().lower()
    p2 = input("Choose Player 2 (H/AI/R): ").strip().lower()

    if p1 == "ai":
        x_player = AIPlayer("X")
    elif p1 == "h":
        x_player = HumanPlayer("X")
    elif p1 == "r":
        x_player = RandomPlayer("X")
    else:
        x_player = HumanPlayer("X")

    if p2 == "ai":
        o_player = AIPlayer("O")
    elif p2 == "h":
        o_player = HumanPlayer("O")
    elif p2 == "r":
        o_player = RandomPlayer("O")
    else:
        o_player = HumanPlayer("O")
    
    print("X------------------------------------------------------------------------------------------------X\n")

    print(f"Player 1 (X) is {p1}")
    print(f"Player 2 (O) is {p2}")

    ctd = 5
    while ctd:
        try:
            sys.stdout.write(f"\rStarting in {ctd}...")
            sys.stdout.flush()
            time.sleep(1)
            ctd -= 1
        except KeyboardInterrupt:
            break
    
    print("\n")
    game = TicTacToe()

    os.system("clear")
    play(game, x_player, o_player, print_game=True)
Example #5
0
def train(learning_curve: list[float], output_file: str = 'data/saved_trees/AIBasic.csv') -> None:
    results = []
    d_tree = DecisionTree(move=-1, turn='yellow', subtrees=[])
    random_player = RandomPlayer()
    for t in learning_curve:
        ai = AIPlayerBasic(d_tree, t)
        moves_played, ai_win = run_game(ai, random_player)
        if ai_win:
            moves_played.append(1)
        else:
            moves_played.append(0)

        d_tree.add_game(moves_played)

        results.append(ai_win)

    write_to_file(d_tree, output_file)

    total_win_percent = len([1 for result in results if result])/len(results)

    recent_wins = 0
    if len(results) > 100:
        flipped_results = results[::-1]
        for i in range(0, 100):
            if flipped_results[i]:
                recent_wins += 1

    recent_win_percent = recent_wins/100

    print('Recent Win Percentage:', recent_win_percent)
    print('Total Win Percentage:', total_win_percent)
Example #6
0
    def test_time_spent(self):
        outer = self

        class TimeSpendingPlayer(AbstractPlayer):
            def get_move(self):
                time_spent_begin = self.time_spent()

                sleep_time = 0.1
                time.sleep(sleep_time)

                time_spent_end = self.time_spent()

                outer.assertTrue(0 <= time_spent_begin < time_spent_end)

                time_diff = abs(time_spent_begin + sleep_time - time_spent_end)
                delta = 0.05
                outer.assertTrue(time_diff < delta)
                return stop

        test_layout = (""" ############
            #0 #.  .# 1#
            ############ """)
        team = [SimpleTeam(TimeSpendingPlayer()), SimpleTeam(RandomPlayer())]
        gm = GameMaster(test_layout, team, 2, 1)
        gm.play()
Example #7
0
def build_players() -> list:
    players = [
        ImpulsivePlayer(),
        DemandingPlayer(),
        CautiousPlayer(),
        RandomPlayer()
    ]
    return players
    def __init__(self,
                 pe_depth,
                 mc_simulation_amount,
                 mc_initial_depth,
                 mc_play_depth=-1,
                 mc_evaluator=WinnerRewardEvaluator((1, -1, .5)),
                 pe_rewards=(2, -2, .9, 0, 0),
                 main_player=RandomPlayer(),
                 opponent=RandomPlayer()):
        '''
        mc is short for "MonteCarlo"
        pe is short for "Position Evaluator"
        pe_depth: how many moves the position evaluator should look ahead (should be at least 1)
        mc_simulation_amount: how many times the monte carlo evaluator should simulate each end position
        mc_initial_depth: how many moves the MonteCarlo simulation should look ahead (should be at least 1)
        mc_play_depth: how many moves the MonteCarlo simulation should simulate before evaluating (after looking ahead)
        ...(should be -1 to play out the entire game or at least 1 to evaluate unfinished games)
        mc_evaluator: evaluator that scores game positions
        pe_rewards: the position evaluator rewards for (win, loss, you can tie, opponent can tie, undetermined)
        main_player: the player who is making the moves in the AdvisedMonteCarloPlayer's position when simulating games
        opponent: the player who is playing against the main_player when simulating games
        '''
        self.pe_depth = pe_depth
        self.mc_simulation_amount = mc_simulation_amount
        self.mc_initial_depth = mc_initial_depth
        self.mc_play_depth = mc_play_depth
        self.mc_evaluator = mc_evaluator
        self.pe_rewards = pe_rewards
        self.sim_main_player = main_player
        self.sim_opponent = opponent

        # the position evaluation function
        self.pe_func = lambda game, player_number: position_eval(
            game, player_number, self.pe_depth - 1, self.pe_rewards)

        # the monte carlo evaluation function
        self.mc_func = lambda game, player_number: monte_carlo_eval(
            game,
            player_number,
            self.mc_evaluator,
            simulation_amount=self.mc_simulation_amount,
            initial_depth=self.mc_initial_depth,
            play_depth=self.mc_play_depth,
            main_player=self.sim_main_player,
            opponent=self.sim_opponent)
def unsure_monte_carlo_eval(game,
                            player_number,
                            unsure_rewards=[1, -1, .5],
                            sure_rewards=[2, -2, 1.5],
                            main_player=RandomPlayer(),
                            simulation_amount=5,
                            depth=0,
                            opponent=RandomPlayer()):
    '''Evalutates a game from player 0's perspective'''
    winner = game.who_won()
    if winner is None:
        # game is not complete
        return monte_carlo_eval(game, player_number, main_player,
                                unsure_rewards, simulation_amount, depth,
                                opponent)
    else:
        # game is complete
        return Evaluation(sure_rewards[winner])
Example #10
0
 def test_demo_players(self):
     test_layout = (""" ############
         #0 #.  .# 1#
         ############ """)
     team = [SimpleTeam(SpeakingPlayer()), SimpleTeam(RandomPlayer())]
     gm = GameMaster(test_layout, team, 2, 1)
     gm.play()
     self.assertTrue(gm.game_state["bot_talk"][0].startswith("Going"))
     self.assertEqual(gm.game_state["bot_talk"][1], "")
Example #11
0
def run_randoms(n: int, output_file: str = 'data/saved_trees/full_tree.csv') -> None:
    start_time = time.time()
    red = RandomPlayer()
    yellow = RandomPlayer()

    d_tree = DecisionTree(move=-1, subtrees=[], turn='yellow')

    for _ in range(0, n):
        results, red_win = run_game(red, yellow)

        if red_win:
            results.append(1)
        else:
            results.append(0)
        d_tree.add_game(results)

    write_to_file(d_tree, output_file)
    # print(d_tree)
    print('Final Run Time:', time.time() - start_time)
Example #12
0
def test_can_create_player():
    players = ["red", "black", "green", "yellow", "blue", "grey"]
    game = Game(players)
    player = RandomPlayer(players[0], game)
    player = NonPlanningProgressMaximizer(players[1], game)
    player = PlanningProgressMaximizer(players[2], game, {
        'max_depth': 5,
    })
    player = SingleMoveProgressMaximizer(players[3], game)
    player = RandomSingleMovePlayer(players[3], game)
Example #13
0
 def get_move(self, board):
     moves = board.available_moves()
     if moves:
         for move in moves:
             if THandPlayer.next_move_winner(board, move, self.mark):
                 return move
             elif THandPlayer.next_move_winner(board, move,
                                               self.opponent_mark):
                 return move
         else:
             return RandomPlayer.get_move(board)
Example #14
0
    def __init__(self,
                 mc_simulation_amount,
                 mc_depth,
                 pe_depth,
                 mc_rewards=(1, -1, .5),
                 pe_rewards=(2, -2, .9, 0, 0),
                 main_player=RandomPlayer(),
                 opponent=RandomPlayer()):
        '''
        mc is short for "MonteCarlo"
        mc_simulation_amount: how many times the monte carlo evaluator should simulate each end position
        mc_depth: how many moves the MonteCarlo simulation should look ahead (should be at least 1)
        pe_depth: how many moves the position evaluator should look ahead (should be at least 1)
        mc_rewards: the monte carlo evaluation rewards for (win, loss, tie)
        pe_rewards: the position evaluator rewards for (win, loss, tie, undetermined)
        main_player: the player who is making the moves in the AdvisedMonteCarloPlayer's position when simulating games
        opponent: the player who is playing against the main_player when simulating games
        '''
        self.mc_simulation_amount = mc_simulation_amount
        self.mc_depth = mc_depth
        self.mc_rewards = mc_rewards
        self.pe_depth = pe_depth
        self.pe_rewards = pe_rewards
        self.sim_main_player = main_player
        self.sim_opponent = opponent

        # the position evaluation function
        self.pe_func = lambda game, player_number: position_eval(
            game, player_number, self.pe_depth - 1, self.pe_rewards)

        # the monte carlo evaluation function

        self.mc_func = lambda game, player_number: monte_carlo_eval(
            game,
            player_number,
            rewards=self.mc_rewards,
            simulation_amount=self.mc_simulation_amount,
            depth=self.mc_depth,
            main_player=self.sim_main_player,
            opponent=self.sim_opponent)
Example #15
0
    def test_get_move(self):
        board = TTTBoard(TestTTTBoard.blank_board)
        player1 = RandomPlayer(1, seed=42)
        player2 = RandomPlayer(1, seed=42)

        a = player1.get_move(board)
        b = player2.get_move(board)

        self.assertEqual(a.board, b.board)

        a = player2.get_move(board)
        b = player1.get_move(board)

        self.assertEqual(a.board, b.board)
Example #16
0
    def test_demo_players(self):
        test_layout = (""" ################
            #              #
            #              #
            #              #
            #   0      1   #
            #              #
            #              #
            #              #
            #.            .#
            ################ """)
        teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())]
        gm = GameMaster(test_layout, teams, 2, 5, seed=20)
        self.assertEqual(gm.universe.bots[0].current_pos, (4, 4))
        self.assertEqual(gm.universe.bots[1].current_pos, (4 + 7, 4))
        gm.play()

        pos_left_bot = gm.universe.bots[0].current_pos
        pos_right_bot = gm.universe.bots[1].current_pos

        # running again to test seed:
        teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())]
        gm = GameMaster(test_layout, teams, 2, 5, seed=20)
        gm.play()
        self.assertEqual(gm.universe.bots[0].current_pos, pos_left_bot)
        self.assertEqual(gm.universe.bots[1].current_pos, pos_right_bot)

        # running again with other seed:
        teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())]
        gm = GameMaster(test_layout, teams, 2, 5, seed=200)
        gm.play()
        # most probably, either the left bot or the right bot or both are at
        # a different position
        self.assertTrue(gm.universe.bots[0].current_pos != pos_left_bot
                        or gm.universe.bots[1].current_pos != pos_right_bot)
Example #17
0
def create_player(player_type):

    player = None
    if player_type == Players.Manual:
        name = input("What's your name ? \n")
        print("player added : {}".format(name))
        player = ManualPlayer(name)
    elif player_type == Players.Bot:
        player = RandomPlayer("RandomBot")
    else:
        raise MotorException("No other player type is available!")

    return player
Example #18
0
def test_epsilon():
    random = RandomPlayer(ConnectFour.moves)
    games = 10000
    colors = ['b', 'g', 'r', 'c', 'm']
    i = 0
    for e in numpy.arange(0.1, 1, 0.2):
        qlearn = QLearningPlayer(ConnectFour.moves, epsilon=e)
        results = playGames(qlearn, random, games)
        x = numpy.arange(games)
        y = numpy.cumsum(results == WinnerState.red) / (x + 1)
        pyplot.plot(x, y, label='epsilon = {}'.format(e), color=colors[i])
        i += 1

    pyplot.legend()
    pyplot.xlabel('Juegos')
    pyplot.ylabel('ProporciĆ³n juegos ganados')
    pyplot.savefig('epsilon.png')
Example #19
0
    def get_move(self, board):
        if np.random.uniform(
        ) < self.epsilon:  # With probability epsilon, choose a move at random ("epsilon-greedy" exploration)
            return RandomPlayer.get_move(board)
        else:
            state_key = QPlayer.make_and_maybe_add_key(board, self.mark,
                                                       self.Q)
            print state_key
            Qs = self.Q[state_key]
            print Qs

            if self.mark == "X":
                print QPlayer.stochastic_argminmax(Qs, max)
                return QPlayer.stochastic_argminmax(Qs, max)
            elif self.mark == "O":
                print QPlayer.stochastic_argminmax(Qs, min)
                return QPlayer.stochastic_argminmax(Qs, min)
Example #20
0
 def configure_players(self, configs, src_file):
     plist = get_piece_list(src_file)
     players = []
     for i in range(len(configs)):
         if configs[i] == "R":
             players.append(RandomPlayer(copy.deepcopy(plist), i))
         elif configs[i] == "AB_0":
             players.append(AlphaBetaAI(copy.deepcopy(plist), i, 0))
         elif configs[i] == "AB_1":
             players.append(AlphaBetaAI(copy.deepcopy(plist), i, 1))
         elif configs[i] == "AB_2":
             players.append(AlphaBetaAI(copy.deepcopy(plist), i, 2))
         elif configs[i] == "AB_3":
             players.append(AlphaBetaAI(copy.deepcopy(plist), i, 3))
         else:
             print "Error: invalid input type " + configs[i]
             sys.exit(1)
     return players
Example #21
0
    def __init__(self, game_number):
        self.game_number = game_number

        logging.debug("Initializing map for game %d", game_number)
        self.map: Map = Map(Args.instance().args().n_turns)
        self.map.load_from_file(Args.instance().args().map)

        self.player: Dict[str, RandomPlayer] = dict()
        for player_name in self.map.players.keys():
            logging.debug("Initializing player %s for game %s as %s",
                          player_name, game_number,
                          Args.instance().args().player)
            if Args.instance().args().player == "random":
                self.player[player_name] = RandomPlayer(player_name)
            elif Args.instance().args().player == "human":
                self.player[player_name] = HumanPlayer(player_name)
            elif Args.instance().args().player == "qlearn":
                self.player[player_name] = QlearnPlayer(player_name)
Example #22
0
    def __init__(self, config):
        self.config = config

        session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=True)))
        self.random_player = RandomPlayer()
        self.exploratory_network = PolicyNetwork(config.exploratory_network)
        self.exploratory_player = PolicyPlayer(self.exploratory_network,
                                               session)

        self.playout_network = PolicyNetwork(
            config.playout_network,
            reuse=config.exploratory_network == config.playout_network)
        self.playout_player = PolicyPlayer(self.playout_network, session)

        self.run_dir = util.run_directory(config)
        util.restore_network_or_fail(session, self.run_dir,
                                     self.exploratory_network)
        util.restore_network_or_fail(session, self.run_dir,
                                     self.playout_network)
Example #23
0
    def generate_state(sl_player, rl_player):
        state = OthelloBoard(OthelloConstants.INITIAL_BOARD)
        color = OthelloConstants.BLACK

        random_player = RandomPlayer()
        U = random.randint(0, 59)

        state, color, game_over = OthelloSimulator.simulate_moves_with_player(
            state, color, sl_player, U)
        if game_over: raise ValueError('game terminated too early.')

        state, color, _ = OthelloSimulator.simulate_moves_with_player(
            state, color, random_player, 1)
        result_state, result_color = state.get_feature_matrix(color), color

        state, _, _ = OthelloSimulator.simulate_moves_with_player(
            state, color, rl_player, 60)

        scores = state.compute_scores()
        value = np.sign(scores[result_color] - scores[-result_color])
        return result_state, value
Example #24
0
def ai_versus_random(depth: int = 6, num_games: int = 10,
                     visualize: bool = True, show_stats: bool = True) -> None:
    """A function that runs num_games games between AIPlayerComplex and RandomPlayer and
    visualizes the game tree generated by those games.

    The visualized game tree will represent all the move sequences played in the games. Each
    node represents a move and the number labelled on the node is the column in which the player
    selected to make a move. The colour of the node represents the player that made the move.
    The last node on each branch indicates the winner of the game. The colour on the last node is
    the player that won, and a green last node indicates a tie.

    num_games is the number of games the players will play. Considering that the AI takes some
    time (varying amount depending on the depth) to make a move, running too many games will take
    a long time. The function defaults to 10 games, and that is recommended. The result of
    running 100 games with default settings is included in the written report.

    depth is the number of moves the AI will look ahead, just like the function play_with_ai.
    Having a high depth does make the AI smarter, but high depth against a RandomPlayer is
    unnecessary. The function defaults to 6, and that is able to win a vast majority of games
    against the RandomPlayer, if not all of them.

    visualize enables each game to be visualized. For a better viewing experience, each game has
    delays in each turn and on the winning page. The default is True, but setting it to False
    will decrease the time needed to run the function. Regardless, the function takes too long
    over a certain number of games, so it is recommended to keep this as True.

    show_stats, when set as True, will show the number of games played, number of red and yellow
    wins, number of ties, and the win rate of each player on the Python console. It is by default
    True and recommended to be True.

    If visualize is True and the game window is forcefully closed using the 'x' button while the
    games are running, the function will stop and only return an instance of the game tree up to
    the point of the force quit. No visualization of the game tree nor stats will be provided.
    """
    red = AIPlayerComplex(depth=depth)
    yellow = RandomPlayer()

    run_games(red, yellow, num_games, visualize, show_stats)
Example #25
0
def saved_ai(n: int, input_file: str = 'data/saved_trees/full_tree.csv'):
    d_tree = build_from_file(input_file)
    results = []
    for _ in range(0, n):
        ai = AIPlayerBasic(d_tree, 0.95)
        random_player = RandomPlayer()

        moves_played, ai_win = run_game(ai, random_player)

        results.append(ai_win)

    total_win_percent = len([1 for result in results if result]) / len(results)

    recent_wins = 0
    if len(results) > 100:
        flipped_results = results[::-1]
        for i in range(0, 100):
            if flipped_results[i]:
                recent_wins += 1

    recent_win_percent = recent_wins / 100

    print('Recent Win Percentage:', recent_win_percent)
    print('Total Win Percentage:', total_win_percent)
Example #26
0
            logs.append(Action(action))
            logs.append(state)

        if done: break

        if show_viz:
            env.render()
            time.sleep(.1)

    return total_reward, logs


env = gym.make("CartPole-v0")

train_logs = []
random_player = RandomPlayer()

total_reward = 0.0
for ep in range(Constants.training_episodes):
    reward, event_log = play_episode(env, random_player, log_events=True)
    train_logs.extend(event_log)
    total_reward += reward

logging.info("Mean reward with random player: {}".format(
    total_reward / Constants.training_episodes))

training_data, training_labels = convert_event_sequence_to_training_set(
    train_logs)
logging.info("Training set size: {}".format(len(training_data)))
logging.info("Training labels distribution: {}".format(
    Counter(training_labels)))
Example #27
0
from game import Game
from sizeable_connect_x import SizeableConnectX


class ConnectFour(SizeableConnectX):
    def __init__(self):
        '''Initialize a connect4 game with 6 rows and 7 columns'''
        super().__init__(6, 7, 4)


if __name__ == "__main__":
    from players import RandomPlayer
    print("Getting random game!")
    c = RandomPlayer.get_random_game(ConnectFour)
    print(c)
    print(c.who_won())
    c.swap_players()
    print(c)

    # haha this takes forever. 64! is under a googol, but not by too much, so that's expected
    print(ConnectFour().get_complexity(-1))
Example #28
0
import numpy as np
from game import Game
from players import QPlayer
from players import SarsaPlayer
from players import RandomPlayer
import matplotlib.pyplot as plt

player = QPlayer(0.79, 0.05)
player1 = QPlayer(0.79, 0.05)
old_score = 0
rp = RandomPlayer()
no_of_games = 100
epochs = 2000
player_wins = []
#player_wins1 = []
for e in range(epochs):
    print("Epoch: %d"%e)
    player.wins = 0
    #player1.wins = 0
    player.explore_rate = np.exp(-0.017*e) / 0.11 + 0.1
    g = Game()
    g.new_game(player, player1)
    g.game_play()
    result = g.getScore()
    res = list(result.items())
    res.sort()
    x = res[0][1]
    player_score =  x / 32 - 1
    if e > 0:
        player1.weight_update(old_score)
    player.weight_update(player_score)
agent_type = 'DeepQLearningAgent'

# setup the game and players
p1 = DeepQLearningAgent(board_size=board_size,
                        buffer_size=buffer_size,
                        gamma=gamma,
                        n_actions=n_actions,
                        use_target_net=use_target_net,
                        epsilon=epsilon,
                        version=version,
                        name='dqn1')
p2 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\
                        gamma=gamma, n_actions=n_actions,
                        use_target_net=use_target_net, epsilon=epsilon,
                        version=version, name='dqn2')
p_random = RandomPlayer(board_size=board_size)
g = Game(player1=p1, player2=p2, board_size=board_size)
g2 = Game(player1=p1, player2=p_random, board_size=board_size)

# check the model architecture
print("Model architecture")
p1._model.summary()

# initializing parameters for DQN
reward_type = 'current'
sample_actions = False
decay = 0.85
epsilon_end = 0.1
n_games_buffer = 300
n_games_train = 10
episodes = 1 * (10**5)
Example #30
0
    batch_iterations=BATCH_ITERATIONS,
    experience_batch_size=EXPERIENCE_BATCH_SIZE,
    experience_buffer_size=EXPERIENCE_BUFFER_SIZE,
)
train_player = PolicyGradientPlayer(
    robot_brain,
    discount_factor=DISCOUNT_FACTOR,
    reward_factor=REWARD_FACTOR,
    batch_iterations=1,
    experience_batch_size=EXPERIENCE_BATCH_SIZE,
    experience_buffer_size=EXPERIENCE_BUFFER_SIZE,
)

human_game = TicTacToe((human, robot))
training = TicTacToe((robot, train_player))
random_training = TicTacToe((robot, RandomPlayer()))

robot.act_greedy = True
robot.show_action_probabilities = True

playing = True
while playing:

    # Gain experience, no learning to keep it fast
    robot.learn_while_playing = False
    random_training.play(32)
    training.play(32)

    # Learn on every move of the human game
    robot.learn_while_playing = True
    playing = human_game.play(2, render=True, pause=0.5)