def _test_environments(): import environments from environments.env_maze import EnvMaze char_grid = [ ['T', '.', '.', '.', '.'], ['.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.'], ['W', 'W', '.', 'W', 'W'], ['.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.'], ] stochasticity = 0.1 bump_penalty = -10 maze = EnvMaze(char_grid, stochasticity, bump_penalty) maze.reset() print(maze.stateString()) maze.performAction('LEFT') print(maze.stateString()) maze.performAction('UP') print(maze.stateString()) from get_environment_from_name import get_environment_from_name names = ['FlipCoin', 'FlipTwoCoins', 'Grid', 'Maze'] for env_name in names: print('____________________________________________') print(env_name) env = get_environment_from_name(env_name) is_finished = env.isFinished() print(is_finished) env.reset() print(env) action = 0 env.performAction(action) print('') print(env) reward = env.getReward() print(reward) is_finished = env.isFinished() print(is_finished) if env_name == 'Grid': env.performAction('UP')
learning_curves[agent_name] = mean_curves return learning_curves if __name__ == '__main__': import sys if (len(sys.argv) < 2): env_name = 'Maze' else: env_name = sys.argv[1] # Initialize an environment from get_environment_from_name import get_environment_from_name env = get_environment_from_name(env_name) n_states = env.numStates() n_actions = env.numActions() # Make a dictionary, and fill it with agents agents_ = {} # from agents.agent_random import AgentRandom # agents_['Random'] = AgentRandom(n_states, n_actions) # from agents.agent_first_action import AgentFirstAction # agents_['FirstAction'] = AgentFirstAction(n_states, n_actions) from agents.agent_montecarlo_statevalues import AgentMonteCarloV agents_['MonteCarlo()'] = AgentMonteCarloV(n_states, n_actions) # for alpha in np.linspace(0.01, 0.3, num=3):