def play_games(num_games): player_1_agent = MinimaxAgent("MinimaxAgent1") player_2_agent = MinimaxAgent("MinimaxAgent2") next_player = { Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1 } player_map = { Player.PLAYER_1: player_1_agent, Player.PLAYER_2: player_2_agent } data_columns = sorted( list(basis_vector(Connect4Board(), Player.PLAYER_1).keys())) data_columns.append("reward") data_columns.append("only_end_reward") data = pd.DataFrame(columns=data_columns) # loop through games for i in range(num_games): print("Game %d!" % (i)) game = Connect4Board() curr_player = Player.PLAYER_1 player_1_episode = [basis_vector(game, Player.PLAYER_1)] player_2_episode = [basis_vector(game, Player.PLAYER_2)] winner = None while True: game = game.add_piece( curr_player, choose_action(player_map[curr_player], curr_player, game)) player_1_episode.append(basis_vector(game, Player.PLAYER_1)) player_2_episode.append(basis_vector(game, Player.PLAYER_2)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: print("DRAW") break if game_state == GameState.PLAYER_1_WIN: winner = Player.PLAYER_1 print("PLAYER 1 WON!") break if game_state == GameState.PLAYER_2_WIN: winner = Player.PLAYER_2 print("PLAYER 2 WON!") break curr_player = next_player[curr_player] if winner != None: episode_data = episode_to_data(player_1_episode, player_2_episode, winner, data_columns) print("Shape of episode data:", episode_data.shape) data = data.append(episode_data) export_data(data)
def run_game(print_board=False): game = Connect4Board() next_player = {Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1} curr_player = Player.PLAYER_1 player_1_agent = HumanAgent("The Human") #player_1_agent = MinimaxAgent("The replacement human") #player_2_agent = HumanAgent("Player 2") #player_2_agent = MinimaxAgent("The AI") #player_2_agent = QLearningAgent("The AI", use_offline_params=True) #player_2_agent = ForwardSearchAgent("The AI") player_2_agent = MCTSAgent("The AI") player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent} while True: if print_board: game.print_board() print("================================") game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: print("DRAW!!!") game.print_board() return elif game_state == GameState.PLAYER_1_WIN: print("PLAYER 1 WINS!!!") game.print_board() return elif game_state == GameState.PLAYER_2_WIN: print("PLAYER 2 WINS!!!") game.print_board() return curr_player = next_player[curr_player]
def test_win_condition(): # check vertical for row in range(6-3): for col in range(7): game = Connect4Board() for i in range(row, row+4): game.board[i][col] = Player.PLAYER_1 if not game.check_win(Player.PLAYER_1): return False, "Failed vertical win condition for player 1", game game = Connect4Board() for i in range(row, row+4): game.board[i][col] = Player.PLAYER_2 if not game.check_win(Player.PLAYER_2): return False, "Failed vertical win condition for player 2", game # check horizontal for row in range(6): for col in range(7-3): game = Connect4Board() for i in range(col, col+4): game.board[row][i] = Player.PLAYER_1 if not game.check_win(Player.PLAYER_1): return False, "Failed horizontal win condition for player 1", game game = Connect4Board() for i in range(col, col+4): game.board[row][i] = Player.PLAYER_2 if not game.check_win(Player.PLAYER_2): return False, "Failed horizontal win condition for player 2", game # check diagonal for row in range(6-3): for col in range(7-3): game = Connect4Board() for i in range(4): game.board[row+i][col+i] = Player.PLAYER_1 if not game.check_win(Player.PLAYER_1): return False, "Failed diagonal / win condition for player 1", game game = Connect4Board() for i in range(4): game.board[row+i][col+i] = Player.PLAYER_2 if not game.check_win(Player.PLAYER_2): return False, "Failed diagonal / win condition for player 2", game for row in range(6-3): for col in range(3,7): game = Connect4Board() for i in range(4): game.board[row+i][col-i] = Player.PLAYER_1 if not game.check_win(Player.PLAYER_1): return False, "Failed diagonal \ win condition for player 1:", game game = Connect4Board() for i in range(4): game.board[row+i][col-i] = Player.PLAYER_2 if not game.check_win(Player.PLAYER_2): return False, "Failed diagonal \ win condition for player 2", game return True, None, None
def start_connect4(width, height, connect): player = request.remote_addr lobby = game_manager.get_game(player) width = int(width) height = int(height) connect = int(connect) print('starting connect4', lobby.get_name()) board = Connect4Board(lobby.get_players(), width, height, connect) lobby.start_game(board) emit('goto_game', '/connect4', room=lobby.get_name())
def run_game(print_board=False): game = Connect4Board() next_player = { Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1 } curr_player = Player.PLAYER_1 player_1_agent = HumanAgent("Player 1") # player_2_agent = HumanAgent("Player 2") player_2_agent = MinimaxAgent(Player.PLAYER_2) player_map = { Player.PLAYER_1: player_1_agent, Player.PLAYER_2: player_2_agent } while True: if print_board: game.print_board() print("================================") game = game.add_piece(curr_player, player_map[curr_player].get_action(game)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: print("DRAW!!!") game.print_board() return elif game_state == GameState.PLAYER_1_WIN: print("PLAYER 1 WINS!!!") game.print_board() return elif game_state == GameState.PLAYER_2_WIN: print("PLAYER 2 WINS!!!") game.print_board() return curr_player = next_player[curr_player]
def play_games(num_games=6): agent_list = instantiate_agents() # set up counts for win/loss scoreboard = { agent.get_name() : { other_agent.get_name() : {"win":0, "loss":0, "draw":0} for other_agent in agent_list if other_agent.get_name() != agent.get_name()} for agent in agent_list} # loop through games next_player = {Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1} for player_1_idx in range(len(agent_list)): for player_2_idx in range(player_1_idx+1, len(agent_list)): player_1_agent = agent_list[player_1_idx] player_2_agent = agent_list[player_2_idx] print("%s VS %s:" %(player_1_agent.get_name(), player_2_agent.get_name())) player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent} for i in range(num_games // 2): print("Game %d" %(i+1)) game = Connect4Board() curr_player = Player.PLAYER_1 winner = None while True: #game.print_board() #print("=======================") game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["draw"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["draw"] += 1 winner = None break if game_state == GameState.PLAYER_1_WIN: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1 winner = player_1_agent.get_name() break if game_state == GameState.PLAYER_2_WIN: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1 winner = player_1_agent.get_name() break curr_player = next_player[curr_player] if winner != None: print(winner, "won!") else: print("Draw!") player_2_agent = agent_list[player_1_idx] player_1_agent = agent_list[player_2_idx] print("%s VS %s:" %(player_1_agent.get_name(), player_2_agent.get_name())) player_map = {Player.PLAYER_1 : player_1_agent, Player.PLAYER_2 : player_2_agent} for i in range(num_games // 2): print("Game %d" %(i+1)) game = Connect4Board() curr_player = Player.PLAYER_1 winner = None while True: #game.print_board() #print("=======================") game = game.add_piece(curr_player, player_map[curr_player].get_action(curr_player, game)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["draw"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["draw"] += 1 winner = None break if game_state == GameState.PLAYER_1_WIN: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1 winner = player_1_agent.get_name() break if game_state == GameState.PLAYER_2_WIN: scoreboard[player_1_agent.get_name()][player_2_agent.get_name()]["win"] += 1 scoreboard[player_2_agent.get_name()][player_1_agent.get_name()]["loss"] += 1 winner = player_1_agent.get_name() break curr_player = next_player[curr_player] if winner != None: print(winner, "won!") else: print("Draw!") # output counts print_scoreboard(scoreboard)
def training(num_iterations, discount_factor=0.8): next_player = { Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1 } # initialize theta theta = { "player_2_out_of_4": 0, "opponent_2_out_of_4": 0, "player_3_out_of_4": 0.01, "opponent_3_out_of_4": -0.01, "player_3_out_of_5": 0.05, "opponent_3_out_of_5": -0.05, "player_num_possible_wins_in_col_0": 0, "player_num_possible_wins_in_col_1": 0, "player_num_possible_wins_in_col_2": 0, "player_num_possible_wins_in_col_3": 0, "player_num_possible_wins_in_col_4": 0, "player_num_possible_wins_in_col_5": 0, "player_num_possible_wins_in_col_6": 0, "opponent_num_possible_wins_in_col_0": 0, "opponent_num_possible_wins_in_col_1": 0, "opponent_num_possible_wins_in_col_2": 0, "opponent_num_possible_wins_in_col_3": 0, "opponent_num_possible_wins_in_col_4": 0, "opponent_num_possible_wins_in_col_5": 0, "opponent_num_possible_wins_in_col_6": 0, "player_num_consecutive_possible_wins_in_col_0": 0.05, "player_num_consecutive_possible_wins_in_col_1": 0.05, "player_num_consecutive_possible_wins_in_col_2": 0.05, "player_num_consecutive_possible_wins_in_col_3": 0.05, "player_num_consecutive_possible_wins_in_col_4": 0.05, "player_num_consecutive_possible_wins_in_col_5": 0.05, "player_num_consecutive_possible_wins_in_col_6": 0.05, "opponent_num_consecutive_possible_wins_in_col_0": -0.05, "opponent_num_consecutive_possible_wins_in_col_1": -0.05, "opponent_num_consecutive_possible_wins_in_col_2": -0.05, "opponent_num_consecutive_possible_wins_in_col_3": -0.05, "opponent_num_consecutive_possible_wins_in_col_4": -0.05, "opponent_num_consecutive_possible_wins_in_col_5": -0.05, "opponent_num_consecutive_possible_wins_in_col_6": -0.05, "player_win": 10, "opponent_win": -10 } N = {} # loop through games for i in range(num_iterations): print("Iteration %d!" % (i + 1)) game = Connect4Board() agent_player = Player.PLAYER_1 if i % 2 == 0 else Player.PLAYER_2 opp_player = Player.PLAYER_1 if i % 2 != 0 else Player.PLAYER_2 curr_player = Player.PLAYER_1 game_end = False winner = None while True: # Choose action based on theta^T * basis + some exploration action = epsilon_greedy(game, curr_player, theta) # Observe new next state and reward game = game.add_piece(curr_player, action) reward = 0 if game.check_draw(): reward = 0 game_end = True elif game.check_win(agent_player): reward = 1 game_end = True winner = agent_player elif game.check_win(opp_player): reward = -1 game_end = True winner = opp_player # Find the action that maximizes q for the next player valid_next_actions = [ action for action in range(game.NUM_COLS) if game.valid_action(action) ] next_q = float("-inf") next_action = None for action in valid_next_actions: temp_basis = basis_vector( game.add_piece(next_player[curr_player], action), next_player[curr_player]) val = calculate_q(theta, temp_basis) if val > next_q: next_q = val next_action = action # Update Theta board_string = game.serialize_board() if board_string not in N: N[board_string] = 1 else: N[board_string] += 1 alpha = 1.0 / N[board_string] basis = basis_vector(game, agent_player) if len(valid_next_actions) != 0 and not game_end: # Using reward clipping to prevent exploding Q-values coefficient = alpha * max(1.0, min(-1.0, (reward \ + discount_factor*calculate_q(theta, basis_vector(game.add_piece(next_player[curr_player], next_action), agent_player)) \ - calculate_q(theta, basis)))) else: coefficient = alpha * reward for key in theta.keys(): theta[key] += coefficient * basis[key] if game_end: if winner == None: print("DRAW!") elif winner == agent_player: print("WON!") elif winner == opp_player: print("LOST!") break curr_player = next_player[curr_player] return theta