def test_tfa_runtime(): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) # use_world_bounds=True runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.05, viewer=viewer, scenario_generator=scenario_generation) tfa_env = TFAWrapper(runtimerl) _ = tfa_env.reset() utils.validate_py_environment(tfa_env, episodes=5) _ = tf_py_environment.TFPyEnvironment(tfa_env)
def test_motion_primitives_concat_state(self): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = SimpleObserver(params=params) action_wrapper = MotionPrimitives(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-40, 40], use_world_bounds=True) runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=False) for _ in range(0, 3): runtimerl.reset() for _ in range(0, 50): # run each scenario for 10 steps action = action_wrapper.action_space.sample() next_observed_state, reward, done, info = \ runtimerl.step(action) if done: print("State: {} \n Reward: {} \n Done {}, Info: {} \n \ =================================================" . \ format(next_observed_state, reward, done, info)) break
def test_runner(): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") base_dir = os.path.dirname(os.path.dirname(__file__)) params["BaseDir"] = base_dir scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=False) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl)) sac_agent = SACAgent(tfa_env, params=params) tfa_runner = TFARunner(tfa_env, sac_agent, params=params, unwrapped_runtime=runtimerl) tfa_runner.collect_initial_episodes() # main functionalities tfa_runner.train() tfa_runner.visualize() tfa_runner.evaluate()
def test_agent(self): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") base_dir = os.path.dirname(os.path.dirname(__file__)) params["BaseDir"] = base_dir scenario_generation = DeterministicScenarioGeneration(num_scenarios=2, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) # use_world_bounds=True runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.05, viewer=viewer, scenario_generator=scenario_generation) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl)) sac_agent = SACAgent(tfa_env, params=params) self.assertEqual(sac_agent._agent.name, "sac_agent") sac_agent.reset() # TODO(@hart): does not work because of read-only file-system # sac_agent.save() sac_agent.load()
def test_runtime_rl(self): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=2, random_seed=0, params=params) state_observer = SimpleObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-40, 40], use_world_bounds=True) #use_world_bounds=True) # runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=False) start_time = time.time() for _ in range(0, 100): runtimerl.reset() done = False reward = 0. for _ in range(0, 50): # run each scenario for 10 steps action = action_wrapper.action_space.sample( ) / 100 # to go straight next_observed_state, reward, done, info = \ runtimerl.step(action) # observer self.assertEqual(len(next_observed_state), 16) np.testing.assert_array_equal( next_observed_state[0:4], state_observer._normalize( runtimerl._world.agents[100].state)[1:5]) np.testing.assert_array_equal( next_observed_state[4:8], state_observer._normalize( runtimerl._world.agents[101].state)[1:5]) if done: print("State: {} \n Reward: {} \n Done {}, Info: {} \n \ =================================================" . \ format(next_observed_state, reward, done, info)) break # must assert to equal as the agent reaches the goal in the # specified number of steps self.assertEqual(done, True) # goal must have been reached which returns a reward of 1. self.assertEqual(reward, 1.) self.assertEqual(runtimerl._world.agents[100].id, 100) self.assertEqual(runtimerl._world.agents[101].id, 101) end_time = time.time() print("100 runs took {}s.".format(str(end_time - start_time)))
def __init__(self, params=ParameterServer(), eval_agent=None): GoalReached.__init__(self, params, eval_agent) self._next_goal_definition = -1 self._last_distance = None self._goal_number = 0
def __init__(self, params=ParameterServer(), eval_agent=None): GoalReached.__init__(self, params, eval_agent)
def __init__(self, params=ParameterServer(), eval_agent=None): GoalReached.__init__(self, params, eval_agent) self._last_goal_id = -1 self._reached_goal_in_last_step = False # TODO: use from config file self._intermediate_goal_reward = 1.0