def test_simple_game(self): layout = """ ########## # # #0 .. 1# ########## """ server = SimpleServer( layout_string=layout, rounds=5, players=2, bind_addrs=("ipc:///tmp/pelita-testplayer1-%s" % uuid.uuid4(), "ipc:///tmp/pelita-testplayer2-%s" % uuid.uuid4())) for bind_address in server.bind_addresses: self.assertTrue(bind_address.startswith("ipc://")) client1_address = server.bind_addresses[0] client2_address = server.bind_addresses[1] client1 = SimpleClient(SimpleTeam("team1", RandomPlayer()), address=client1_address) client2 = SimpleClient(SimpleTeam("team2", RandomPlayer()), address=client2_address) client1.autoplay_process() client2.autoplay_process() server.run() server.shutdown()
def monte_carlo_eval(original_game, player_number, main_player=RandomPlayer(), rewards=(1, -1, .5), simulation_amount=100, depth=0, opponent=RandomPlayer()): '''Returns a MonteCarloEvaluation object Plays random games from the given game position and averages the results original_game is the game to simulate from player_number is the number of the player whose persepective we're evaluating the game from main_player is a Player object that we use to simulate the moves of the player with player_number rewards is a tuple (or list): (points for winning, points for losing, points for tying) simulation_amount is how many games to simulate per possible game at the specified depth depth is how many layers down you want it to start simulating opponent is the Player object we use to simulate the games of the player with the number that's not player_number ''' if depth == 0: value = 0 for _ in range(simulation_amount): game = original_game.get_copy() # set up the players players = [None, None] players[player_number] = main_player players[original_game.get_other_player(player_number)] = opponent # play out a game while game.who_won() is None: players[game.active_player].make_move(game) # update the value value += simple_eval(game, player_number, rewards).value # average the games' scores return MonteCarloEvaluation(value / simulation_amount, simulation_amount) else: winner = original_game.who_won() if winner is None: # list of MonteCarloEvaluation objects for each game lower_level = [ monte_carlo_eval(game, player_number, main_player, rewards, simulation_amount, depth - 1, opponent) for game in original_game.get_next_level() ] # average the values across the same level return MonteCarloEvaluation( sum([e.value for e in lower_level]) / len(lower_level), sum([e.simulations for e in lower_level])) else: # game is finished, so use winner_eval return MonteCarloEvaluation( winner_eval(winner, player_number, rewards).value, simulation_amount)
def test_random_seeds(self): test_layout = (""" ################ # # # # # # # 0 1 # # 2 3 # # # # # #. .# ################ """) players_a = [RandomPlayer() for _ in range(4)] team_1 = [ SimpleTeam(players_a[0], players_a[2]), SimpleTeam(players_a[1], players_a[3]) ] gm1 = GameMaster(test_layout, team_1, 4, 5, seed=20) gm1.set_initial() random_numbers_a = [ player.rnd.randint(0, 10000) for player in players_a ] # check that each player has a different seed (if randomness allows) self.assertEqual(len(set(random_numbers_a)), 4, "Probably not all player seeds were unique.") players_b = [RandomPlayer() for _ in range(4)] team_2 = [ SimpleTeam(players_b[0], players_b[2]), SimpleTeam(players_b[1], players_b[3]) ] gm2 = GameMaster(test_layout, team_2, 4, 5, seed=20) gm2.set_initial() random_numbers_b = [ player.rnd.randint(0, 10000) for player in players_b ] self.assertEqual(random_numbers_a, random_numbers_b) players_c = [RandomPlayer() for _ in range(4)] team_3 = [ SimpleTeam(players_c[0], players_c[2]), SimpleTeam(players_c[1], players_c[3]) ] gm3 = GameMaster(test_layout, team_3, 4, 5, seed=200) gm3.set_initial() random_numbers_c = [ player.rnd.randint(0, 10000) for player in players_c ] self.assertNotEqual(random_numbers_a, random_numbers_c)
def run(): print("WELCOME TO TIC-TAC-TOE!") print("X------------------------------------------------------------------------------------------------X") print("You can choose player 1 or player 2. They can be Human(H), Random Computer Moves (R) or AI (AI). Player 1 uses the X token " "and player 2 uses O. Both players are Human by default.") print("The Human Player is controlled by you. The AI is unbeatable and chooses its own moves. Random chooses any move on the board" " at random.") print("X------------------------------------------------------------------------------------------------X") p1 = input("Choose Player 1 (H/AI/R): ").strip().lower() p2 = input("Choose Player 2 (H/AI/R): ").strip().lower() if p1 == "ai": x_player = AIPlayer("X") elif p1 == "h": x_player = HumanPlayer("X") elif p1 == "r": x_player = RandomPlayer("X") else: x_player = HumanPlayer("X") if p2 == "ai": o_player = AIPlayer("O") elif p2 == "h": o_player = HumanPlayer("O") elif p2 == "r": o_player = RandomPlayer("O") else: o_player = HumanPlayer("O") print("X------------------------------------------------------------------------------------------------X\n") print(f"Player 1 (X) is {p1}") print(f"Player 2 (O) is {p2}") ctd = 5 while ctd: try: sys.stdout.write(f"\rStarting in {ctd}...") sys.stdout.flush() time.sleep(1) ctd -= 1 except KeyboardInterrupt: break print("\n") game = TicTacToe() os.system("clear") play(game, x_player, o_player, print_game=True)
def train(learning_curve: list[float], output_file: str = 'data/saved_trees/AIBasic.csv') -> None: results = [] d_tree = DecisionTree(move=-1, turn='yellow', subtrees=[]) random_player = RandomPlayer() for t in learning_curve: ai = AIPlayerBasic(d_tree, t) moves_played, ai_win = run_game(ai, random_player) if ai_win: moves_played.append(1) else: moves_played.append(0) d_tree.add_game(moves_played) results.append(ai_win) write_to_file(d_tree, output_file) total_win_percent = len([1 for result in results if result])/len(results) recent_wins = 0 if len(results) > 100: flipped_results = results[::-1] for i in range(0, 100): if flipped_results[i]: recent_wins += 1 recent_win_percent = recent_wins/100 print('Recent Win Percentage:', recent_win_percent) print('Total Win Percentage:', total_win_percent)
def test_time_spent(self): outer = self class TimeSpendingPlayer(AbstractPlayer): def get_move(self): time_spent_begin = self.time_spent() sleep_time = 0.1 time.sleep(sleep_time) time_spent_end = self.time_spent() outer.assertTrue(0 <= time_spent_begin < time_spent_end) time_diff = abs(time_spent_begin + sleep_time - time_spent_end) delta = 0.05 outer.assertTrue(time_diff < delta) return stop test_layout = (""" ############ #0 #. .# 1# ############ """) team = [SimpleTeam(TimeSpendingPlayer()), SimpleTeam(RandomPlayer())] gm = GameMaster(test_layout, team, 2, 1) gm.play()
def build_players() -> list: players = [ ImpulsivePlayer(), DemandingPlayer(), CautiousPlayer(), RandomPlayer() ] return players
def __init__(self, pe_depth, mc_simulation_amount, mc_initial_depth, mc_play_depth=-1, mc_evaluator=WinnerRewardEvaluator((1, -1, .5)), pe_rewards=(2, -2, .9, 0, 0), main_player=RandomPlayer(), opponent=RandomPlayer()): ''' mc is short for "MonteCarlo" pe is short for "Position Evaluator" pe_depth: how many moves the position evaluator should look ahead (should be at least 1) mc_simulation_amount: how many times the monte carlo evaluator should simulate each end position mc_initial_depth: how many moves the MonteCarlo simulation should look ahead (should be at least 1) mc_play_depth: how many moves the MonteCarlo simulation should simulate before evaluating (after looking ahead) ...(should be -1 to play out the entire game or at least 1 to evaluate unfinished games) mc_evaluator: evaluator that scores game positions pe_rewards: the position evaluator rewards for (win, loss, you can tie, opponent can tie, undetermined) main_player: the player who is making the moves in the AdvisedMonteCarloPlayer's position when simulating games opponent: the player who is playing against the main_player when simulating games ''' self.pe_depth = pe_depth self.mc_simulation_amount = mc_simulation_amount self.mc_initial_depth = mc_initial_depth self.mc_play_depth = mc_play_depth self.mc_evaluator = mc_evaluator self.pe_rewards = pe_rewards self.sim_main_player = main_player self.sim_opponent = opponent # the position evaluation function self.pe_func = lambda game, player_number: position_eval( game, player_number, self.pe_depth - 1, self.pe_rewards) # the monte carlo evaluation function self.mc_func = lambda game, player_number: monte_carlo_eval( game, player_number, self.mc_evaluator, simulation_amount=self.mc_simulation_amount, initial_depth=self.mc_initial_depth, play_depth=self.mc_play_depth, main_player=self.sim_main_player, opponent=self.sim_opponent)
def unsure_monte_carlo_eval(game, player_number, unsure_rewards=[1, -1, .5], sure_rewards=[2, -2, 1.5], main_player=RandomPlayer(), simulation_amount=5, depth=0, opponent=RandomPlayer()): '''Evalutates a game from player 0's perspective''' winner = game.who_won() if winner is None: # game is not complete return monte_carlo_eval(game, player_number, main_player, unsure_rewards, simulation_amount, depth, opponent) else: # game is complete return Evaluation(sure_rewards[winner])
def test_demo_players(self): test_layout = (""" ############ #0 #. .# 1# ############ """) team = [SimpleTeam(SpeakingPlayer()), SimpleTeam(RandomPlayer())] gm = GameMaster(test_layout, team, 2, 1) gm.play() self.assertTrue(gm.game_state["bot_talk"][0].startswith("Going")) self.assertEqual(gm.game_state["bot_talk"][1], "")
def run_randoms(n: int, output_file: str = 'data/saved_trees/full_tree.csv') -> None: start_time = time.time() red = RandomPlayer() yellow = RandomPlayer() d_tree = DecisionTree(move=-1, subtrees=[], turn='yellow') for _ in range(0, n): results, red_win = run_game(red, yellow) if red_win: results.append(1) else: results.append(0) d_tree.add_game(results) write_to_file(d_tree, output_file) # print(d_tree) print('Final Run Time:', time.time() - start_time)
def test_can_create_player(): players = ["red", "black", "green", "yellow", "blue", "grey"] game = Game(players) player = RandomPlayer(players[0], game) player = NonPlanningProgressMaximizer(players[1], game) player = PlanningProgressMaximizer(players[2], game, { 'max_depth': 5, }) player = SingleMoveProgressMaximizer(players[3], game) player = RandomSingleMovePlayer(players[3], game)
def get_move(self, board): moves = board.available_moves() if moves: for move in moves: if THandPlayer.next_move_winner(board, move, self.mark): return move elif THandPlayer.next_move_winner(board, move, self.opponent_mark): return move else: return RandomPlayer.get_move(board)
def __init__(self, mc_simulation_amount, mc_depth, pe_depth, mc_rewards=(1, -1, .5), pe_rewards=(2, -2, .9, 0, 0), main_player=RandomPlayer(), opponent=RandomPlayer()): ''' mc is short for "MonteCarlo" mc_simulation_amount: how many times the monte carlo evaluator should simulate each end position mc_depth: how many moves the MonteCarlo simulation should look ahead (should be at least 1) pe_depth: how many moves the position evaluator should look ahead (should be at least 1) mc_rewards: the monte carlo evaluation rewards for (win, loss, tie) pe_rewards: the position evaluator rewards for (win, loss, tie, undetermined) main_player: the player who is making the moves in the AdvisedMonteCarloPlayer's position when simulating games opponent: the player who is playing against the main_player when simulating games ''' self.mc_simulation_amount = mc_simulation_amount self.mc_depth = mc_depth self.mc_rewards = mc_rewards self.pe_depth = pe_depth self.pe_rewards = pe_rewards self.sim_main_player = main_player self.sim_opponent = opponent # the position evaluation function self.pe_func = lambda game, player_number: position_eval( game, player_number, self.pe_depth - 1, self.pe_rewards) # the monte carlo evaluation function self.mc_func = lambda game, player_number: monte_carlo_eval( game, player_number, rewards=self.mc_rewards, simulation_amount=self.mc_simulation_amount, depth=self.mc_depth, main_player=self.sim_main_player, opponent=self.sim_opponent)
def test_get_move(self): board = TTTBoard(TestTTTBoard.blank_board) player1 = RandomPlayer(1, seed=42) player2 = RandomPlayer(1, seed=42) a = player1.get_move(board) b = player2.get_move(board) self.assertEqual(a.board, b.board) a = player2.get_move(board) b = player1.get_move(board) self.assertEqual(a.board, b.board)
def test_demo_players(self): test_layout = (""" ################ # # # # # # # 0 1 # # # # # # # #. .# ################ """) teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())] gm = GameMaster(test_layout, teams, 2, 5, seed=20) self.assertEqual(gm.universe.bots[0].current_pos, (4, 4)) self.assertEqual(gm.universe.bots[1].current_pos, (4 + 7, 4)) gm.play() pos_left_bot = gm.universe.bots[0].current_pos pos_right_bot = gm.universe.bots[1].current_pos # running again to test seed: teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())] gm = GameMaster(test_layout, teams, 2, 5, seed=20) gm.play() self.assertEqual(gm.universe.bots[0].current_pos, pos_left_bot) self.assertEqual(gm.universe.bots[1].current_pos, pos_right_bot) # running again with other seed: teams = [SimpleTeam(RandomPlayer()), SimpleTeam(RandomPlayer())] gm = GameMaster(test_layout, teams, 2, 5, seed=200) gm.play() # most probably, either the left bot or the right bot or both are at # a different position self.assertTrue(gm.universe.bots[0].current_pos != pos_left_bot or gm.universe.bots[1].current_pos != pos_right_bot)
def create_player(player_type): player = None if player_type == Players.Manual: name = input("What's your name ? \n") print("player added : {}".format(name)) player = ManualPlayer(name) elif player_type == Players.Bot: player = RandomPlayer("RandomBot") else: raise MotorException("No other player type is available!") return player
def test_epsilon(): random = RandomPlayer(ConnectFour.moves) games = 10000 colors = ['b', 'g', 'r', 'c', 'm'] i = 0 for e in numpy.arange(0.1, 1, 0.2): qlearn = QLearningPlayer(ConnectFour.moves, epsilon=e) results = playGames(qlearn, random, games) x = numpy.arange(games) y = numpy.cumsum(results == WinnerState.red) / (x + 1) pyplot.plot(x, y, label='epsilon = {}'.format(e), color=colors[i]) i += 1 pyplot.legend() pyplot.xlabel('Juegos') pyplot.ylabel('Proporción juegos ganados') pyplot.savefig('epsilon.png')
def get_move(self, board): if np.random.uniform( ) < self.epsilon: # With probability epsilon, choose a move at random ("epsilon-greedy" exploration) return RandomPlayer.get_move(board) else: state_key = QPlayer.make_and_maybe_add_key(board, self.mark, self.Q) print state_key Qs = self.Q[state_key] print Qs if self.mark == "X": print QPlayer.stochastic_argminmax(Qs, max) return QPlayer.stochastic_argminmax(Qs, max) elif self.mark == "O": print QPlayer.stochastic_argminmax(Qs, min) return QPlayer.stochastic_argminmax(Qs, min)
def configure_players(self, configs, src_file): plist = get_piece_list(src_file) players = [] for i in range(len(configs)): if configs[i] == "R": players.append(RandomPlayer(copy.deepcopy(plist), i)) elif configs[i] == "AB_0": players.append(AlphaBetaAI(copy.deepcopy(plist), i, 0)) elif configs[i] == "AB_1": players.append(AlphaBetaAI(copy.deepcopy(plist), i, 1)) elif configs[i] == "AB_2": players.append(AlphaBetaAI(copy.deepcopy(plist), i, 2)) elif configs[i] == "AB_3": players.append(AlphaBetaAI(copy.deepcopy(plist), i, 3)) else: print "Error: invalid input type " + configs[i] sys.exit(1) return players
def __init__(self, game_number): self.game_number = game_number logging.debug("Initializing map for game %d", game_number) self.map: Map = Map(Args.instance().args().n_turns) self.map.load_from_file(Args.instance().args().map) self.player: Dict[str, RandomPlayer] = dict() for player_name in self.map.players.keys(): logging.debug("Initializing player %s for game %s as %s", player_name, game_number, Args.instance().args().player) if Args.instance().args().player == "random": self.player[player_name] = RandomPlayer(player_name) elif Args.instance().args().player == "human": self.player[player_name] = HumanPlayer(player_name) elif Args.instance().args().player == "qlearn": self.player[player_name] = QlearnPlayer(player_name)
def __init__(self, config): self.config = config session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) self.random_player = RandomPlayer() self.exploratory_network = PolicyNetwork(config.exploratory_network) self.exploratory_player = PolicyPlayer(self.exploratory_network, session) self.playout_network = PolicyNetwork( config.playout_network, reuse=config.exploratory_network == config.playout_network) self.playout_player = PolicyPlayer(self.playout_network, session) self.run_dir = util.run_directory(config) util.restore_network_or_fail(session, self.run_dir, self.exploratory_network) util.restore_network_or_fail(session, self.run_dir, self.playout_network)
def generate_state(sl_player, rl_player): state = OthelloBoard(OthelloConstants.INITIAL_BOARD) color = OthelloConstants.BLACK random_player = RandomPlayer() U = random.randint(0, 59) state, color, game_over = OthelloSimulator.simulate_moves_with_player( state, color, sl_player, U) if game_over: raise ValueError('game terminated too early.') state, color, _ = OthelloSimulator.simulate_moves_with_player( state, color, random_player, 1) result_state, result_color = state.get_feature_matrix(color), color state, _, _ = OthelloSimulator.simulate_moves_with_player( state, color, rl_player, 60) scores = state.compute_scores() value = np.sign(scores[result_color] - scores[-result_color]) return result_state, value
def ai_versus_random(depth: int = 6, num_games: int = 10, visualize: bool = True, show_stats: bool = True) -> None: """A function that runs num_games games between AIPlayerComplex and RandomPlayer and visualizes the game tree generated by those games. The visualized game tree will represent all the move sequences played in the games. Each node represents a move and the number labelled on the node is the column in which the player selected to make a move. The colour of the node represents the player that made the move. The last node on each branch indicates the winner of the game. The colour on the last node is the player that won, and a green last node indicates a tie. num_games is the number of games the players will play. Considering that the AI takes some time (varying amount depending on the depth) to make a move, running too many games will take a long time. The function defaults to 10 games, and that is recommended. The result of running 100 games with default settings is included in the written report. depth is the number of moves the AI will look ahead, just like the function play_with_ai. Having a high depth does make the AI smarter, but high depth against a RandomPlayer is unnecessary. The function defaults to 6, and that is able to win a vast majority of games against the RandomPlayer, if not all of them. visualize enables each game to be visualized. For a better viewing experience, each game has delays in each turn and on the winning page. The default is True, but setting it to False will decrease the time needed to run the function. Regardless, the function takes too long over a certain number of games, so it is recommended to keep this as True. show_stats, when set as True, will show the number of games played, number of red and yellow wins, number of ties, and the win rate of each player on the Python console. It is by default True and recommended to be True. If visualize is True and the game window is forcefully closed using the 'x' button while the games are running, the function will stop and only return an instance of the game tree up to the point of the force quit. No visualization of the game tree nor stats will be provided. """ red = AIPlayerComplex(depth=depth) yellow = RandomPlayer() run_games(red, yellow, num_games, visualize, show_stats)
def saved_ai(n: int, input_file: str = 'data/saved_trees/full_tree.csv'): d_tree = build_from_file(input_file) results = [] for _ in range(0, n): ai = AIPlayerBasic(d_tree, 0.95) random_player = RandomPlayer() moves_played, ai_win = run_game(ai, random_player) results.append(ai_win) total_win_percent = len([1 for result in results if result]) / len(results) recent_wins = 0 if len(results) > 100: flipped_results = results[::-1] for i in range(0, 100): if flipped_results[i]: recent_wins += 1 recent_win_percent = recent_wins / 100 print('Recent Win Percentage:', recent_win_percent) print('Total Win Percentage:', total_win_percent)
logs.append(Action(action)) logs.append(state) if done: break if show_viz: env.render() time.sleep(.1) return total_reward, logs env = gym.make("CartPole-v0") train_logs = [] random_player = RandomPlayer() total_reward = 0.0 for ep in range(Constants.training_episodes): reward, event_log = play_episode(env, random_player, log_events=True) train_logs.extend(event_log) total_reward += reward logging.info("Mean reward with random player: {}".format( total_reward / Constants.training_episodes)) training_data, training_labels = convert_event_sequence_to_training_set( train_logs) logging.info("Training set size: {}".format(len(training_data))) logging.info("Training labels distribution: {}".format( Counter(training_labels)))
from game import Game from sizeable_connect_x import SizeableConnectX class ConnectFour(SizeableConnectX): def __init__(self): '''Initialize a connect4 game with 6 rows and 7 columns''' super().__init__(6, 7, 4) if __name__ == "__main__": from players import RandomPlayer print("Getting random game!") c = RandomPlayer.get_random_game(ConnectFour) print(c) print(c.who_won()) c.swap_players() print(c) # haha this takes forever. 64! is under a googol, but not by too much, so that's expected print(ConnectFour().get_complexity(-1))
import numpy as np from game import Game from players import QPlayer from players import SarsaPlayer from players import RandomPlayer import matplotlib.pyplot as plt player = QPlayer(0.79, 0.05) player1 = QPlayer(0.79, 0.05) old_score = 0 rp = RandomPlayer() no_of_games = 100 epochs = 2000 player_wins = [] #player_wins1 = [] for e in range(epochs): print("Epoch: %d"%e) player.wins = 0 #player1.wins = 0 player.explore_rate = np.exp(-0.017*e) / 0.11 + 0.1 g = Game() g.new_game(player, player1) g.game_play() result = g.getScore() res = list(result.items()) res.sort() x = res[0][1] player_score = x / 32 - 1 if e > 0: player1.weight_update(old_score) player.weight_update(player_score)
agent_type = 'DeepQLearningAgent' # setup the game and players p1 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size, gamma=gamma, n_actions=n_actions, use_target_net=use_target_net, epsilon=epsilon, version=version, name='dqn1') p2 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\ gamma=gamma, n_actions=n_actions, use_target_net=use_target_net, epsilon=epsilon, version=version, name='dqn2') p_random = RandomPlayer(board_size=board_size) g = Game(player1=p1, player2=p2, board_size=board_size) g2 = Game(player1=p1, player2=p_random, board_size=board_size) # check the model architecture print("Model architecture") p1._model.summary() # initializing parameters for DQN reward_type = 'current' sample_actions = False decay = 0.85 epsilon_end = 0.1 n_games_buffer = 300 n_games_train = 10 episodes = 1 * (10**5)
batch_iterations=BATCH_ITERATIONS, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) train_player = PolicyGradientPlayer( robot_brain, discount_factor=DISCOUNT_FACTOR, reward_factor=REWARD_FACTOR, batch_iterations=1, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) human_game = TicTacToe((human, robot)) training = TicTacToe((robot, train_player)) random_training = TicTacToe((robot, RandomPlayer())) robot.act_greedy = True robot.show_action_probabilities = True playing = True while playing: # Gain experience, no learning to keep it fast robot.learn_while_playing = False random_training.play(32) training.play(32) # Learn on every move of the human game robot.learn_while_playing = True playing = human_game.play(2, render=True, pause=0.5)