def test_subgoals():
    """Test that the right subgoals are set."""
    environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4])

    assert environment.num_states() == 9

    assert environment.create_subgoals() == [Subgoal(2), Subgoal(5), Subgoal(8)]
def test_subgoals():
    """Test that the right subgoals are set."""
    environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4])

    assert environment.num_states() == 9

    assert environment.create_subgoals() == [
        Subgoal(2), Subgoal(5), Subgoal(8)
    ]
Example #3
0
def main(argv):
    """Execute experiment."""
    args = parse_args(argv)
    random.seed(args.seed)
    logging.basicConfig(level=log_level(args.log_level))
    environment = (args.environment == 'gridworld' and Gridworld(args.gridworld_width, args.gridworld_height, args.failure_rate)) or \
                  (args.environment == 'gridworld_continuous' and GridworldContinuous(0.2, 0.01)) or \
                  (args.environment == 'combo_lock' and CombinationLock(args.gridworld_height, args.gridworld_width, 4, args.failure_rate))
    policy = (args.agent_policy == 'random'
              and RandomPolicy(environment.num_actions))
    fa = ((args.environment == 'gridworld' or args.environment == 'combo_lock') and
          TabularFA(environment.num_states(), environment.num_actions)) or \
        (args.environment == 'gridworld_continuous' and RBF(2, 5, environment.num_actions, beta=args.beta))
    agent = Agent(policy,
                  fa,
                  environment.num_actions,
                  args.alpha,
                  args.gamma,
                  args.eta,
                  args.zeta,
                  args.epsilon,
                  args.num_vi,
                  args.sim_samples,
                  args.sim_steps,
                  retain_theta=args.retain_theta,
                  subgoals=environment.create_subgoals())
    agent.policy = RandomOptionPolicy(agent, args.random_options)
    if args.agent_viz:
        agent.create_visualization(
            args.environment == 'gridworld'
            or args.environment == 'combo_lock', environment)
    # agent.exploit(np.asarray([1, 1]))
    # agent.exploit(environment.num_states()-1)
    # results_descriptor = ResultsDescriptor(args.results_interval, args.results_path, ['interval_id', 'steps'])
    # experiment_descriptor = ExperimentDescriptor(args.plan_interval, args.num_steps)
    # start(experiment_descriptor, agent, environment, results_descriptor)
    e = Experiment2(agent, environment, args.plan_interval, args.num_steps,
                    args.viz_steps)
    e.run()
def test_grid_position():
    """Are the grid positions correct?"""
    environment = CombinationLock(3, 3, 6, [1, 5, 4], 0.0)
    assert environment.grid_position_from_state(2) == GridPosition(2, 0)
    assert environment.grid_position_from_state(3) == GridPosition(0, 1)
def test_taking_correct_actions():
    """Does the environment correctly change the state when told to take an action with and without stochasticity?"""
    random.seed()
    environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4])

    assert environment.actions_from_state(0) == []
    assert environment.actions_from_state(1) == [1]
    assert environment.actions_from_state(2) == [1, 5]
    assert environment.actions_from_state(3) == [1, 5, 4]

    assert not environment.is_terminal([])

    # Deterministic tests
    state1 = environment.next_state(environment.initial_state(), 1)
    assert state1 == 1
    assert not environment.is_terminal(environment.actions_from_state(state1))

    state2 = environment.next_state(state1, 5)
    assert state2 == 2
    assert not environment.is_terminal(environment.actions_from_state(state2))

    state3 = environment.next_state(state2, 4)
    assert state3 == 3

    assert environment.is_terminal(environment.actions_from_state(state3) + [2])
    state4 = environment.next_state(state3, 4)
    assert state4 == 0
    assert not environment.is_terminal(environment.actions_from_state(state4))

    assert environment.is_terminal([2])
def test_grid_position():
    """Are the grid positions correct?"""
    environment = CombinationLock(3, 3, 6, [1, 5, 4], 0.0)
    assert environment.grid_position_from_state(2) == GridPosition(2, 0)
    assert environment.grid_position_from_state(3) == GridPosition(0, 1)
def test_taking_correct_actions():
    """Does the environment correctly change the state when told to take an action with and without stochasticity?"""
    random.seed()
    environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4])

    assert environment.actions_from_state(0) == []
    assert environment.actions_from_state(1) == [1]
    assert environment.actions_from_state(2) == [1, 5]
    assert environment.actions_from_state(3) == [1, 5, 4]

    assert not environment.is_terminal([])

    # Deterministic tests
    state1 = environment.next_state(environment.initial_state(), 1)
    assert state1 == 1
    assert not environment.is_terminal(environment.actions_from_state(state1))

    state2 = environment.next_state(state1, 5)
    assert state2 == 2
    assert not environment.is_terminal(environment.actions_from_state(state2))

    state3 = environment.next_state(state2, 4)
    assert state3 == 3

    assert environment.is_terminal(
        environment.actions_from_state(state3) + [2])
    state4 = environment.next_state(state3, 4)
    assert state4 == 0
    assert not environment.is_terminal(environment.actions_from_state(state4))

    assert environment.is_terminal([2])