Example #1
0
def test_discrete_gridworld_experiment():
    """Test the learning on an n*n tabular, discrete gridworld."""
    environment = Gridworld(3, 3, 0.0)
    experiment_description = ExperimentDescriptor(5, 100)
    agent = Agent(RandomPolicy(environment.num_actions), TabularFA(environment.num_states(), environment.num_actions),
                  environment.num_actions, 0.1, 0.99, 0.1, 0.1, 0.05, 10, 0, 0)
    results_path = os.path.join(os.getcwd(), 'results.txt')
    results_descriptor = ResultsDescriptor(100, results_path, ['interval_id', 'steps'])
    start(experiment_description, agent, environment, results_descriptor)
Example #2
0
def test_discrete_gridworld_experiment():
    """Test the learning on an n*n tabular, discrete gridworld."""
    environment = Gridworld(3, 3, 0.0)
    experiment_description = ExperimentDescriptor(5, 100)
    agent = Agent(RandomPolicy(environment.num_actions),
                  TabularFA(environment.num_states(), environment.num_actions),
                  environment.num_actions, 0.1, 0.99, 0.1, 0.1, 0.05, 10, 0, 0)
    results_path = os.path.join(os.getcwd(), 'results.txt')
    results_descriptor = ResultsDescriptor(100, results_path,
                                           ['interval_id', 'steps'])
    start(experiment_description, agent, environment, results_descriptor)
Example #3
0
def main(argv):
    """Execute experiment."""
    args = parse_args(argv)
    random.seed(args.seed)
    logging.basicConfig(level=log_level(args.log_level))
    environment = (args.environment == 'gridworld' and Gridworld(args.gridworld_width, args.gridworld_height, args.failure_rate)) or \
                  (args.environment == 'gridworld_continuous' and GridworldContinuous(0.2, 0.01)) or \
                  (args.environment == 'combo_lock' and CombinationLock(args.gridworld_height, args.gridworld_width, 4, args.failure_rate))
    policy = (args.agent_policy == 'random'
              and RandomPolicy(environment.num_actions))
    fa = ((args.environment == 'gridworld' or args.environment == 'combo_lock') and
          TabularFA(environment.num_states(), environment.num_actions)) or \
        (args.environment == 'gridworld_continuous' and RBF(2, 5, environment.num_actions, beta=args.beta))
    agent = Agent(policy,
                  fa,
                  environment.num_actions,
                  args.alpha,
                  args.gamma,
                  args.eta,
                  args.zeta,
                  args.epsilon,
                  args.num_vi,
                  args.sim_samples,
                  args.sim_steps,
                  retain_theta=args.retain_theta,
                  subgoals=environment.create_subgoals())
    agent.policy = RandomOptionPolicy(agent, args.random_options)
    if args.agent_viz:
        agent.create_visualization(
            args.environment == 'gridworld'
            or args.environment == 'combo_lock', environment)
    # agent.exploit(np.asarray([1, 1]))
    # agent.exploit(environment.num_states()-1)
    # results_descriptor = ResultsDescriptor(args.results_interval, args.results_path, ['interval_id', 'steps'])
    # experiment_descriptor = ExperimentDescriptor(args.plan_interval, args.num_steps)
    # start(experiment_descriptor, agent, environment, results_descriptor)
    e = Experiment2(agent, environment, args.plan_interval, args.num_steps,
                    args.viz_steps)
    e.run()
Example #4
0
def test_termination():
    """Does the environment terminate in the correct state?"""
    env = Gridworld(4, 4, 0)
    assert not env.is_terminal(env.state_from_grid_position(GridPosition(0, 0)))
    assert env.is_terminal(env.state_from_grid_position(GridPosition(3, 3)))
Example #5
0
def test_taking_actions():
    """Does the environment correctly change the state when told to take an action with and without stochasticity?"""
    random.seed()
    env = Gridworld(4, 4, 0.0)

    # Deterministic tests
    assert env.next_state(env.initial_state(), Action.up) == env.state_from_grid_position(GridPosition(0, 1))
    assert env.next_state(env.initial_state(), Action.down) == env.state_from_grid_position(GridPosition(0, 0))
    assert env.next_state(env.initial_state(), Action.left) == env.state_from_grid_position(GridPosition(0, 0))
    assert env.next_state(env.initial_state(), Action.right) == env.state_from_grid_position(GridPosition(1, 0))

    # Stochastic tests
    env.failure_rate = 0.1
    assert ratio_test(lambda state: state == env.state_from_grid_position(GridPosition(0, 0)), partial(env.next_state, env.initial_state(), Action.left), 10000) == 1.0
    ratio = ratio_test(lambda state: state == env.state_from_grid_position(GridPosition(0, 0)), partial(env.next_state, env.initial_state(), Action.up), 10000)
    assert 0.09 < ratio < 0.11