def test_subgoals(): """Test that the right subgoals are set.""" environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4]) assert environment.num_states() == 9 assert environment.create_subgoals() == [Subgoal(2), Subgoal(5), Subgoal(8)]
def test_subgoals(): """Test that the right subgoals are set.""" environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4]) assert environment.num_states() == 9 assert environment.create_subgoals() == [ Subgoal(2), Subgoal(5), Subgoal(8) ]
def main(argv): """Execute experiment.""" args = parse_args(argv) random.seed(args.seed) logging.basicConfig(level=log_level(args.log_level)) environment = (args.environment == 'gridworld' and Gridworld(args.gridworld_width, args.gridworld_height, args.failure_rate)) or \ (args.environment == 'gridworld_continuous' and GridworldContinuous(0.2, 0.01)) or \ (args.environment == 'combo_lock' and CombinationLock(args.gridworld_height, args.gridworld_width, 4, args.failure_rate)) policy = (args.agent_policy == 'random' and RandomPolicy(environment.num_actions)) fa = ((args.environment == 'gridworld' or args.environment == 'combo_lock') and TabularFA(environment.num_states(), environment.num_actions)) or \ (args.environment == 'gridworld_continuous' and RBF(2, 5, environment.num_actions, beta=args.beta)) agent = Agent(policy, fa, environment.num_actions, args.alpha, args.gamma, args.eta, args.zeta, args.epsilon, args.num_vi, args.sim_samples, args.sim_steps, retain_theta=args.retain_theta, subgoals=environment.create_subgoals()) agent.policy = RandomOptionPolicy(agent, args.random_options) if args.agent_viz: agent.create_visualization( args.environment == 'gridworld' or args.environment == 'combo_lock', environment) # agent.exploit(np.asarray([1, 1])) # agent.exploit(environment.num_states()-1) # results_descriptor = ResultsDescriptor(args.results_interval, args.results_path, ['interval_id', 'steps']) # experiment_descriptor = ExperimentDescriptor(args.plan_interval, args.num_steps) # start(experiment_descriptor, agent, environment, results_descriptor) e = Experiment2(agent, environment, args.plan_interval, args.num_steps, args.viz_steps) e.run()
def test_grid_position(): """Are the grid positions correct?""" environment = CombinationLock(3, 3, 6, [1, 5, 4], 0.0) assert environment.grid_position_from_state(2) == GridPosition(2, 0) assert environment.grid_position_from_state(3) == GridPosition(0, 1)
def test_taking_correct_actions(): """Does the environment correctly change the state when told to take an action with and without stochasticity?""" random.seed() environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4]) assert environment.actions_from_state(0) == [] assert environment.actions_from_state(1) == [1] assert environment.actions_from_state(2) == [1, 5] assert environment.actions_from_state(3) == [1, 5, 4] assert not environment.is_terminal([]) # Deterministic tests state1 = environment.next_state(environment.initial_state(), 1) assert state1 == 1 assert not environment.is_terminal(environment.actions_from_state(state1)) state2 = environment.next_state(state1, 5) assert state2 == 2 assert not environment.is_terminal(environment.actions_from_state(state2)) state3 = environment.next_state(state2, 4) assert state3 == 3 assert environment.is_terminal(environment.actions_from_state(state3) + [2]) state4 = environment.next_state(state3, 4) assert state4 == 0 assert not environment.is_terminal(environment.actions_from_state(state4)) assert environment.is_terminal([2])
def test_taking_correct_actions(): """Does the environment correctly change the state when told to take an action with and without stochasticity?""" random.seed() environment = CombinationLock(3, 3, 6, 0.0, [1, 5, 4]) assert environment.actions_from_state(0) == [] assert environment.actions_from_state(1) == [1] assert environment.actions_from_state(2) == [1, 5] assert environment.actions_from_state(3) == [1, 5, 4] assert not environment.is_terminal([]) # Deterministic tests state1 = environment.next_state(environment.initial_state(), 1) assert state1 == 1 assert not environment.is_terminal(environment.actions_from_state(state1)) state2 = environment.next_state(state1, 5) assert state2 == 2 assert not environment.is_terminal(environment.actions_from_state(state2)) state3 = environment.next_state(state2, 4) assert state3 == 3 assert environment.is_terminal( environment.actions_from_state(state3) + [2]) state4 = environment.next_state(state3, 4) assert state4 == 0 assert not environment.is_terminal(environment.actions_from_state(state4)) assert environment.is_terminal([2])