Ejemplo n.º 1
0
def _test_environments():

    import environments
    from environments.env_maze import EnvMaze

    char_grid = [
        ['T', '.', '.', '.', '.'],
        ['.', '.', '.', '.', '.'],
        ['.', '.', '.', '.', '.'],
        ['W', 'W', '.', 'W', 'W'],
        ['.', '.', '.', '.', '.'],
        ['.', '.', '.', '.', '.'],
    ]
    stochasticity = 0.1
    bump_penalty = -10
    maze = EnvMaze(char_grid, stochasticity, bump_penalty)

    maze.reset()
    print(maze.stateString())
    maze.performAction('LEFT')
    print(maze.stateString())
    maze.performAction('UP')
    print(maze.stateString())

    from get_environment_from_name import get_environment_from_name

    names = ['FlipCoin', 'FlipTwoCoins', 'Grid', 'Maze']

    for env_name in names:
        print('____________________________________________')
        print(env_name)

        env = get_environment_from_name(env_name)
        is_finished = env.isFinished()
        print(is_finished)

        env.reset()
        print(env)
        action = 0
        env.performAction(action)
        print('')
        print(env)
        reward = env.getReward()
        print(reward)
        is_finished = env.isFinished()
        print(is_finished)

        if env_name == 'Grid':
            env.performAction('UP')
        learning_curves[agent_name] = mean_curves

    return learning_curves


if __name__ == '__main__':

    import sys
    if (len(sys.argv) < 2):
        env_name = 'Maze'
    else:
        env_name = sys.argv[1]

    # Initialize an environment
    from get_environment_from_name import get_environment_from_name
    env = get_environment_from_name(env_name)
    n_states = env.numStates()
    n_actions = env.numActions()

    # Make a dictionary, and fill it with agents
    agents_ = {}

    # from agents.agent_random import AgentRandom
    # agents_['Random'] = AgentRandom(n_states, n_actions)

    # from agents.agent_first_action import AgentFirstAction
    # agents_['FirstAction'] = AgentFirstAction(n_states, n_actions)

    from agents.agent_montecarlo_statevalues import AgentMonteCarloV
    agents_['MonteCarlo()'] = AgentMonteCarloV(n_states, n_actions)
    # for alpha in np.linspace(0.01, 0.3, num=3):