def test_create_sac_agent(self): params = ParameterServer() map_interface = self.map_interface observer = NearestAgentsObserver() env = ExternalRuntime(map_interface=map_interface, observer=observer, params=params) env.ml_behavior = BehaviorContinuousML(params) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent self.assertTrue(isinstance(env.ml_behavior, BehaviorSACAgent))
def test_generate_ego_trajectory(self): params = ParameterServer() env = self.create_runtime_and_setup_empty_world(params) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # add ego state = np.array([0, 0, 0, 0, 0, 0]) goal_line = Line2d(np.array([[0., 0.], [1., 1.]])) env.addEgoAgent(state, goal_line) N = 10 state_traj, action_traj = env.generateTrajectory(0.2, N) env._viewer.drawTrajectory(state_traj) env.render() self.assertEqual(len(state_traj), N)
def generate_test_trajectory(self): """Generates a test trajectory from a SAC agent """ sac_agent = BehaviorSACAgent(environment=self.env, params=self.params) self.env.ml_behavior = sac_agent runner = SACRunnerGenerator(params=self.params, environment=self.env, agent=sac_agent) expert_trajectories = runner.GenerateExpertTrajectories( num_trajectories=1, render=False) dirname = "test_expert_trajectories" Path(dirname).mkdir(parents=True, exist_ok=True) filename = os.path.join(dirname, "test_expert_trajectory.jblb") joblib.dump(expert_trajectories[0], filename) return dirname
def test_configurable_blueprint(self): params = ParameterServer( filename="bark_ml/tests/data/highway_merge_configurable.json") # continuous model ml_behavior = BehaviorContinuousML(params=params) bp = ConfigurableScenarioBlueprint(params=params, ml_behavior=ml_behavior) env = SingleAgentRuntime(blueprint=bp, render=False) # agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # test run env.reset() for _ in range(0, 5): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_tracing_bark_world(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # NOTE: this also tests if a BARK agent is self-contained env.ml_behavior.set_actions_externally = False env.reset() bark_world = env._world for j in range(0, 2): for i in range(0, 5): bark_world.Step(0.2) eval_dict = bark_world.Evaluate() tracer.Trace(eval_dict, num_episode=j) self.assertEqual(len(tracer._states), 10)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) b = ml_behavior.Plan(0.2, observed_world) self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) np.testing.assert_array_equal(a, b)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=True) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" )
def test_sac_agent(self): params = ParameterServer() env = gym.make("highway-v0") sac_agent = BehaviorSACAgent(environment=env, params=params) ppo_agent = BehaviorPPOAgent(environment=env, params=params) behaviors = [ppo_agent, sac_agent] for ml_agent in behaviors: env.ml_behavior = ml_agent env.reset() eval_id = env._scenario._eval_agent_ids[0] cloned_world = env._world.Copy() self.assertEqual(env._world.agents[eval_id].behavior_model, ml_agent) for _ in range(0, 5): env._world.Step(0.2) self.assertEqual(cloned_world.agents[eval_id].behavior_model, ml_agent) for _ in range(0, 5): cloned_world.Step(0.2) for cloned_agent, agent in zip(env._world.agents.values(), cloned_world.agents.values()): # NOTE: should be the same as mean is taken from the agents np.testing.assert_array_equal(cloned_agent.state, agent.state)
def test_agent_wrapping(self): params = ParameterServer() env = gym.make("highway-v0") env.reset() agent = BehaviorPPOAgent(environment=env, params=params) agent = BehaviorSACAgent(environment=env, params=params)