def test1(): env = Environment(RandomPlayer('O')) agent = RLPlayer('X') episode = Episode(agent, env) board = p.empty_board() agent, final_board = episode.execute(board) return agent, final_board
def play_game(p1, p2, file=None): board = p.empty_board() players = [p1, p2] current_player_index = 0 winner = None move_count = 0 while (True): print("Current move is for player: ", players[current_player_index].player) if (file is not None): file.write("PRIOR TO MOVE " + str(move_count) + " ------------\n") file.write(to_display_string(board)) if p.is_cat_game(board): if (file is not None): file.write("RESULT IS CAT GAME") break m = players[current_player_index].pick_next_move(board) board = p.add_move(m[0], m[1], board) p.display_board(board) move_count += 1 if p.is_winner(board, players[current_player_index].player): winner = players[current_player_index] if (file is not None): file.write("FINAL BOARD AFTER MOVE " + str(move_count) + " WINNER IS: " + winner.player + "\n") file.write(to_display_string(board)) break # alternate players if (current_player_index == 0): print("Switching to player 1...") current_player_index = 1 else: print("Switching to player 0...") current_player_index = 0 if (winner is None): print("CAT GAME") else: print("WINNER IS PLAYER: ", winner.player) return winner
def generate_tic_tac_toe_episode(policy): current_board = p.empty_board() opponent = GreedyRandomPlayer('O') game_complete = False transitions = [] while not game_complete: previous_state = current_board print("PRIOR TO MOVE:") print(game.to_display_string(current_board)) file.write("PRIOR TO MOVE:\n") file.write(game.to_display_string(current_board)) selectedAction = sample_tic_tac_toe_policy(current_board, policy) # model the environment --> returns a next state and a reward next_state, reward, game_complete = model_environment(opponent, current_board, selectedAction) # append the episode transition = {} transition['from_state'] = current_board transition['to_state'] = next_state transition['action'] = selectedAction transition['reward'] = reward transitions.append(transition) file.write("ADDING TRANSITION: " + str(transition) + "\n") current_board = next_state # now figure out the reward print("BOARD AT END OF EPISODE") print(game.to_display_string(current_board)) file.write("BOARD AT END OF EPISODE\n") file.write(game.to_display_string(current_board)) return transitions
def test_sample_policy(): board = p.empty_board() policy = {} move = sample_tic_tac_toe_policy(board,policy) print("SELECTED MOVE: ", move)