def run_configuration(argv): # Uncomment one of the following default parameter filename definitions, # depending on which GNN library you'd like to use. # File with standard parameters for tf2_gnn use: # param_filename = "examples/example_params/tfa_sac_gnn_tf2_gnn_default.json" # File with standard parameters for spektral use: param_filename = "examples/example_params/tfa_sac_gnn_spektral_default.json" params = ParameterServer(filename=param_filename) # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH" # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH" #viewer = MPViewer( # params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) #viewer = VideoRenderer( # renderer=viewer, # world_step_time=0.2, # fig_path="/your_path_here/training/video/") # create environment bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Visualize(5) elif FLAGS.mode == "evaluate": runner.Evaluate()
def configurable_setup(params, num_scenarios, graph_sac=True): """Configurable GNN setup depending on a given filename Args: params: ParameterServer instance Returns: observer: GraphObserver instance actor: ActorNetwork of BehaviorGraphSACAgent """ observer = GraphObserver(params=params) bp = ContinuousHighwayBlueprint(params, number_of_senarios=num_scenarios, random_seed=0) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) if graph_sac: # Get GNN SAC actor net sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) else: sac_agent = BehaviorSACAgent(environment=env, params=params) actor = sac_agent._agent._actor_network return observer, actor
def test_gnn_parameters(self): params = ParameterServer() params["ML"]["BehaviorGraphSACAgent"]["GNN"]["NumMpLayers"] = 4 params["ML"]["BehaviorGraphSACAgent"]["GNN"]["MpLayerNumUnits"] = 64 params["ML"]["BehaviorGraphSACAgent"]["GNN"]["message_calculation_class"] = "gnn_edge_mlp" params["ML"]["BehaviorGraphSACAgent"]["GNN"]["global_exchange_mode"] = "mean" gnn_library = GNNWrapper.SupportedLibrary.spektral params["ML"]["BehaviorGraphSACAgent"]["GNN"]["Library"] = gnn_library bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) actor_gnn = sac_agent._agent._actor_network._gnn critic_gnn = sac_agent._agent._critic_network_1._gnn for gnn in [actor_gnn, critic_gnn]: self.assertEqual(gnn._params["NumMpLayers"], 4) self.assertEqual(gnn._params["MpLayerNumUnits"], 64) self.assertEqual(gnn._params["message_calculation_class"], "gnn_edge_mlp") self.assertEqual(gnn._params["global_exchange_mode"], "mean") self.assertEqual(gnn._params["Library"], gnn_library)
def test_sac_graph_agent(self): params = ParameterServer() bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) env.ml_behavior = sac_agent env.reset() eval_id = env._scenario._eval_agent_ids[0] self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent) for _ in range(0, 5): env._world.Step(0.2)
def __init__(self, blueprint=None, ml_behavior=None, observer=None, evaluator=None, step_time=None, viewer=None, scenario_generator=None, render=False, max_col_rate=0.1, behavior_model_pool=None, ego_rule_based=None, params=None): SingleAgentRuntime.__init__( self, blueprint=blueprint, ml_behavior=ml_behavior, observer=observer, evaluator=evaluator, step_time=step_time, viewer=viewer, scenario_generator=scenario_generator, render=render) self._params = params or ParameterServer() self._max_col_rate = params["ML"][ "MaxColRate", "Max. collision rate allowed over all counterfactual worlds.", 0.1] self._cf_simulation_steps = params["ML"][ "CfSimSteps", "Simulation steps for the counterfactual worlds.", 5] self._visualize_cf_worlds = params["ML"][ "VisualizeCfWorlds", "Whether the counterfactual worlds are visualized.", False] self._visualize_heatmap = params["ML"][ "VisualizeCfHeatmap", "Whether the heatmap is being visualized.", False] self._results_folder = params["ML"][ "ResultsFolder", "Whether the heatmap is being visualized.", "./"] self._logger = logging.getLogger() self._behavior_model_pool = behavior_model_pool or [] self._ego_rule_based = ego_rule_based or BehaviorIDMLaneTracking(self._params) self._tracer = Tracer() if self._visualize_heatmap: _, self._axs_heatmap = plt.subplots(1, 1, constrained_layout=True) self._count = 0 self._cf_axs = {}
def test_tracing_bark_world(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # NOTE: this also tests if a BARK agent is self-contained env.ml_behavior.set_actions_externally = False env.reset() bark_world = env._world for j in range(0, 2): for i in range(0, 5): bark_world.Step(0.2) eval_dict = bark_world.Evaluate() tracer.Trace(eval_dict, num_episode=j) self.assertEqual(len(tracer._states), 10)
def test_nearest_observer(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) env.reset() world = env._world # under test observer = NearestAgentsObserver(params) eval_id = env._scenario._eval_agent_ids[0] observed_world = world.Observe([eval_id])[0] start_time = time.time() observed_state = observer.Observe(observed_world) end_time = time.time() print(f"It took {end_time-start_time} seconds.") print(observed_state, observer.observation_space.shape)
def run_configuration(argv): """ Main """ params = ParameterServer( filename="examples/example_params/tfa_generate_params.json") # params = ParameterServer() output_dir = params["GenerateExpertTrajectories"]["OutputDirectory"] # create environment blueprint = params["World"]["Blueprint"] if blueprint == 'merging': bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) elif blueprint == 'highway': bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0) else: raise ValueError(f'{blueprint} is no valid blueprint.') env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent runner = SACRunnerGenerator(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Visualize(params["Visualization"]["NumberOfEpisodes"]) elif FLAGS.mode == "generate": expert_trajectories = runner.GenerateExpertTrajectories( num_trajectories=params["GenerateExpertTrajectories"] ["NumberOfTrajectories"], render=params["World"]["render"]) save_expert_trajectories(output_dir=output_dir, expert_trajectories=expert_trajectories) # store all used params of the training # params.Save(os.path.join(Path.home(), "examples/example_params/tfa_params.json")) sys.exit(0)
def setUp(self): """Setup """ self.params = ParameterServer( filename="bark_ml/tests/py_library_tf2rl_tests/data/params.json") bp = ContinuousMergingBlueprint(self.params, number_of_senarios=10, random_seed=0) self.env = SingleAgentRuntime(blueprint=bp, render=False)
def test_agent_and_runner(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) agent = BehaviorPPOAgent(environment=env, params=params) # set agent params["ML"]["PPORunner"]["NumberOfCollections"] = 2 params["ML"]["SACRunner"]["NumberOfCollections"] = 2 params["ML"]["TFARunner"]["EvaluationSteps"] = 2 env.ml_behavior = agent self.assertEqual(env.ml_behavior.set_action_externally, False) runner = PPORunner(params=params, environment=env, agent=agent) runner.Train() self.assertEqual(env.ml_behavior.set_action_externally, True) runner.Run() self.assertEqual(env.ml_behavior.set_action_externally, True)
def run_configuration(argv): params = ParameterServer( filename="examples/example_params/tfa_params.json") # params = ParameterServer() # NOTE: Modify these paths in order to save the checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/" # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/" params["Visualization"]["Agents"]["Alpha"]["Other"] = 0.2 params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2 params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2 params["ML"]["VisualizeCfWorlds"] = False params["ML"]["VisualizeCfHeatmap"] = True params["World"]["remove_agents_out_of_map"] = False viewer = MPViewer(params=params, x_range=[-35, 35], y_range=[-35, 35], follow_agent_id=True) # create environment bp = ContinuousMergingBlueprint(params, num_scenarios=10000, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False, viewer=viewer) # PPO-agent # ppo_agent = BehaviorPPOAgent(environment=env, # params=params) # env.ml_behavior = ppo_agent # runner = PPORunner(params=params, # environment=env, # agent=ppo_agent) # SAC-agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Run(num_episodes=50, render=True) elif FLAGS.mode == "evaluate": runner.Run(num_episodes=100, render=False)
def step(self, action): """perform the cf evaluation""" # simulate counterfactual worlds local_tracer = Tracer() eval_id = self._scenario._eval_agent_ids[0] self.St() cf_worlds = self.GenerateCounterfactualWorlds() for v in self._cf_axs.values(): v["count"] = 0 for i, cf_world in enumerate(cf_worlds): cf_key = list(cf_world.keys())[0] self.SimulateWorld( cf_world[cf_key], local_tracer, N=self._cf_simulation_steps, replaced_agent=cf_key, num_virtual_world=i) self.Et() # NOTE: this world would actually have the predicted traj. gt_world = self.ReplaceBehaviorModel() self.SimulateWorld( gt_world, local_tracer, N=self._cf_simulation_steps, replaced_agent="None", num_virtual_world="None") # NOTE: outsource hist = gt_world.agents[eval_id].history traj = np.stack([x[0] for x in hist]) # self._viewer.drawTrajectory(traj, color='blue') if self._visualize_heatmap: self.DrawHeatmap( local_tracer, filename=self._results_folder + "cf_%03d" % self._count + "_heatmap") # evaluate counterfactual worlds trace = self.TraceCounterfactualWorldStats(local_tracer) collision_rate = trace['collision']/len(self._behavior_model_pool) print(collision_rate) self._logger.info( f"The counterfactual worlds have a collision" + \ f"-rate of {collision_rate:.3f}.") # choose a policy executed_learned_policy = 1 if collision_rate > self._max_col_rate: executed_learned_policy = 0 self._logger.info( f"Executing fallback model.") self._world.agents[eval_id].behavior_model = self._ego_rule_based trace["executed_learned_policy"] = executed_learned_policy self._tracer.Trace(trace) self._count += 1 for fig in self._cf_axs.values(): for sub_ax in fig["ax"]: sub_ax.clear() return SingleAgentRuntime.step(self, action)
def test_tracer(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) for i in range(0, 2): env.reset() for _ in range(0, 10): action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) data = (observed_next_state, reward, done, info) = env.step(action) tracer.Trace(data, num_episode=i) # NOTE: test basic tracing self.assertEqual(len(tracer._states), 20) for i in range(0, 20): self.assertEqual("is_terminal" in tracer._states[i].keys(), True) self.assertEqual("reward" in tracer._states[i].keys(), True) self.assertEqual("collision" in tracer._states[i].keys(), True) self.assertEqual("drivable_area" in tracer._states[i].keys(), True) self.assertEqual("goal_reached" in tracer._states[i].keys(), True) self.assertEqual("step_count" in tracer._states[i].keys(), True) # NOTE: test pandas magic tracer.ConvertToDf() # average collisions print( tracer.Query(key="collision", group_by="num_episode", agg_type="MEAN").mean()) # average reward print( tracer.Query(key="reward", group_by="num_episode", agg_type="SUM").mean()) # NOTE: test reset tracer.Reset() self.assertEqual(len(tracer._states), 0) self.assertEqual(tracer._df, None)
def test_agents(self): params = ParameterServer() params["ML"]["BaseAgent"]["NumSteps"] = 2 params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) # IQN Agent # iqn_agent = IQNAgent(env=env, test_env=env, params=params) # env.ml_behavior = iqn_agent # self.assertEqual(env.ml_behavior.set_action_externally, False) # iqn_agent.run() # self.assertEqual(env.ml_behavior.set_action_externally, True) # FQF Agent fqf_agent = FQFAgent(env=env, params=params) env.ml_behavior = fqf_agent self.assertEqual(env.ml_behavior.set_action_externally, False) fqf_agent.train() self.assertEqual(env.ml_behavior.set_action_externally, True)
def run_configuration(argv): params = ParameterServer() # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/" # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/" #viewer = MPViewer( # params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) #viewer = VideoRenderer( # renderer=viewer, # world_step_time=0.2, # fig_path="/your_path_here/training/video/") # create environment bp = ContinuousMergingBlueprint(params, num_scenarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params, init_gnn='init_interaction_network') env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Run(num_episodes=10, render=True) elif FLAGS.mode == "evaluate": runner.Run(num_episodes=250, render=False)
def test_general_evaluator(self): params = ParameterServer() bp = ContinuousSingleLaneBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) evaluator = GeneralEvaluator(params) env._evaluator = evaluator env.reset() for _ in range(0, 4): state, terminal, reward, info = env.step(np.array([0., 0.])) print(terminal, reward)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) #env = gym.make("highway-v1", params=params) ml_behaviors = [] # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params)) ml_behaviors.append(FQFAgent(env=env, params=params)) # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() action = np.random.randint(low=0, high=env.action_space.n) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] # do a random action and plan trajectory action = np.random.randint(low=1, high=env.action_space.n) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) # sample another different random action another_action = action while another_action == action: another_action = np.random.randint(low=1, high=env.action_space.n) # plan trajectory for the another action ml_behavior.ActionToBehavior(another_action) b = ml_behavior.Plan(0.2, observed_world) # the trajectory generated by two different actions shoould be different self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. ml_behavior._set_action_externally = False a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) last_action = ml_behavior.GetLastAction() self.assertTrue(isinstance(last_action, float)) # same trajectory for same state np.testing.assert_array_equal(a, b)
def setUp(self): """ setup """ self.params = ParameterServer( filename=os.path.join(os.path.dirname(__file__), "gail_data/params/gail_params_bark.json")) local_params = self.params["ML"]["GAILRunner"]["tf2rl"] # creating the dirs for logging if they are not present already: for key in ['logdir', 'model_dir', 'expert_path_dir']: local_params[key] = os.path.join(Path.home(), local_params[key]) if not os.path.exists(local_params[key]): os.makedirs(local_params[key]) # create environment self.bp = ContinuousMergingBlueprint(self.params, number_of_senarios=500, random_seed=0) self.env = SingleAgentRuntime(blueprint=self.bp, render=False) # wrapped environment for compatibility with tf2rl self.wrapped_env = TF2RLWrapper(self.env) # Dummy expert trajectories: self.expert_trajs = { 'obses': np.zeros((10, 16)), 'next_obses': np.ones((10, 16)), 'acts': 2 * np.ones((10, 2)) } # create agent and runner: self.agent = BehaviorGAILAgent(environment=self.wrapped_env, params=self.params) self.env.ml_behavior = self.agent self.runner = GAILRunner(environment=self.wrapped_env, agent=self.agent, params=self.params, expert_trajs=self.expert_trajs)
def test_configurable_blueprint(self): params = ParameterServer( filename="bark_ml/tests/data/highway_merge_configurable.json") # continuous model ml_behavior = BehaviorContinuousML(params=params) bp = ConfigurableScenarioBlueprint(params=params, ml_behavior=ml_behavior) env = SingleAgentRuntime(blueprint=bp, render=False) # agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # test run env.reset() for _ in range(0, 5): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def _configurable_setup(self, params_filename): """Configurable GNN setup depending on a given filename Args: params_filename: str, corresponds to path of params file Returns: params: ParameterServer instance observer: GraphObserver instance actor: ActorNetwork of BehaviorGraphSACAgent """ params = ParameterServer(filename=params_filename) observer = GraphObserver(params=params) bp = ContinuousHighwayBlueprint(params, number_of_senarios=2, random_seed=0) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) # Get GNN SAC actor net sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) actor = sac_agent._agent._actor_network return params, observer, actor
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=True) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" )
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) b = ml_behavior.Plan(0.2, observed_world) self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) np.testing.assert_array_equal(a, b)
from bark_ml.observers.nearest_state_observer import NearestAgentsObserver from bark_ml.environments.blueprints import DiscreteMergingBlueprint # create scenario if not os.path.exists("examples"): logging.info("changing directory") os.chdir("diadem_dqn.runfiles/bark_ml") bark_params = ParameterServer( filename="examples/example_params/diadem_params.json") bp = DiscreteMergingBlueprint(bark_params, number_of_senarios=100, random_seed=0) observer = NearestAgentsObserver(bark_params) runtime = SingleAgentRuntime(blueprint=bp, observer=observer, render=True) def run_dqn_algorithm(parameter_files): exp_dir = "tmp_exp_dir" diadem_params = Params(filename=parameter_files) config_logging(console=True) environment = DiademBarkEnvironment(runtime=runtime) context = AgentContext(environment=environment, datamanager=None, preprocessor=None, optimizer=tf.train.AdamOptimizer, summary_service=PandasSummary()) agent = AgentManager(params=diadem_params, context=context) exp = Experiment(params=diadem_params['experiment'],
def __init__(self, *args, **kwargs): SingleAgentRuntime.__init__(self, *args, **kwargs)
def __init__(self, params=ParameterServer(), render=False): discrete_merging_bp = DiscreteIntersectionBlueprint(params) SingleAgentRuntime.__init__(self, blueprint=discrete_merging_bp, render=render)
def __init__(self): params = ParameterServer() cont_merging_bp = ContinuousIntersectionBlueprint(params) SingleAgentRuntime.__init__(self, blueprint=cont_merging_bp, render=True)
def __init__(self, params=ParameterServer(), render=False): discrete_highway_bp = DiscreteHighwayBlueprint(params) SingleAgentRuntime.__init__(self, blueprint=discrete_highway_bp, render=render)
class PyGraphObserverTests(unittest.TestCase): """Observer tests""" def _get_observation(self, observer, world, eval_id): observed_world = world.Observe([eval_id])[0] observation = observer.Observe(observed_world) return observation, observed_world def setUp(self): """Setting up the test-case.""" params = ParameterServer() bp = ContinuousHighwayBlueprint(params, random_seed=0) self.env = SingleAgentRuntime(blueprint=bp, render=False) self.env.reset() self.world = self.env._world self.observer = GraphObserver(params) self.eval_id = self.env._scenario._eval_agent_ids[0] def test_parameter_server_usage(self): expected_num_agents = 15 expected_visibility_radius = 100 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = expected_num_agents params["ML"]["GraphObserver"]["VisibilityRadius"] = expected_visibility_radius params["ML"]["GraphObserver"]["NormalizationEnabled"] = True observer = GraphObserver(params=params) self.assertEqual(observer._num_agents, expected_num_agents) self.assertEqual(observer._visibility_radius, expected_visibility_radius) # self.assertTrue(observer._add_self_loops) self.assertTrue(observer._normalize_observations) def test_request_subset_of_available_node_features(self): params = ParameterServer() requested_features = GraphObserver.available_node_attributes()[0:5] params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features observer = GraphObserver(params=params) self.assertEqual( observer._enabled_node_attribute_keys, requested_features) def test_request_subset_of_available_edge_features(self): params = ParameterServer() requested_features = GraphObserver.available_edge_attributes()[0:2] params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features observer = GraphObserver(params=params) self.assertEqual( observer._enabled_edge_attribute_keys, requested_features) def test_request_partially_invalid_node_features(self): params = ParameterServer() requested_features =\ GraphObserver.available_node_attributes()[0:5] + ['invalid'] params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features observer = GraphObserver(params=params) # remove invalid feature from expected list requested_features.pop(-1) self.assertEqual( observer._enabled_node_attribute_keys, requested_features) def test_request_partially_invalid_edge_features(self): params = ParameterServer() requested_features =\ GraphObserver.available_edge_attributes()[0:2] + ['invalid'] params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features observer = GraphObserver(params=params) # remove invalid feature from expected list requested_features.pop(-1) self.assertEqual( observer._enabled_edge_attribute_keys, requested_features) def test_observe_with_self_loops(self): num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents params["ML"]["GraphObserver"]["SelfLoops"] = True observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0])) # assert ones on the diagonal of the adjacency matrix tf.assert_equal(adjacency_list_diagonal, tf.ones(num_agents)) def test_observe_without_self_loops(self): num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents params["ML"]["GraphObserver"]["SelfLoops"] = False observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0])) # assert zeros on the diagonal of the adjacency matrix tf.assert_equal(adjacency_list_diagonal, tf.zeros(num_agents)) def test_observation_conforms_to_spec(self): """ Verify that the observation returned by the observer is valid with respect to its defined observation space. """ num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) self.assertTrue(observer.observation_space.contains(obs)) # additionally check that the adjacency list is binary, since # this can't be enforced by the observation space currently adj_start_idx = num_agents * observer.feature_len adj_end_idx = adj_start_idx + num_agents ** 2 adj_list = obs[adj_start_idx : adj_end_idx] for element in adj_list: self.assertIn(element, [0, 1]) def test_observed_agents_selection(self): agent_limit = 10 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = agent_limit observer = GraphObserver(params=params) obs, obs_world = self._get_observation( observer=observer, world=self.world, eval_id=self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension nodes, _, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) nodes = nodes[0] # remove batch dim ego_node = nodes[0] ego_node_pos = Point2d( ego_node[0].numpy(), # x coordinate ego_node[1].numpy()) # y coordinate # verify that the nodes are ordered by # ascending distance to the ego node max_distance_to_ego = 0 for node in nodes: pos = Point2d( node[0].numpy(), # x coordinate node[1].numpy()) # y coordinate distance_to_ego = Distance(pos, ego_node_pos) self.assertGreaterEqual(distance_to_ego, max_distance_to_ego, msg='Nodes are not sorted by distance relative to '\ + 'the ego node in ascending order.') max_distance_to_ego = distance_to_ego def test_observation_to_graph_conversion(self): params = ParameterServer() params["ML"]["GraphObserver"]["SelfLoops"] = False graph_observer = GraphObserver(params=params) num_nodes = 5 num_features = 5 num_edge_features = 4 node_features = np.random.random_sample((num_nodes, num_features)) edge_features = np.random.random_sample((num_nodes, num_nodes, num_edge_features)) # note that edges are bidirectional, the # the matrix is symmetric adjacency_list = [ [0, 1, 1, 1, 0], # 1 connects with 2, 3, 4 [1, 0, 1, 1, 0], # 2 connects with 3, 4 [1, 1, 0, 1, 0], # 3 connects with 4 [1, 1, 1, 0, 0], # 4 has no links [0, 0, 0, 0, 0] # empty slot -> all zeros ] observation = np.array(node_features) observation = np.append(observation, adjacency_list) observation = np.append(observation, edge_features) observation = observation.reshape(-1) observations = np.array([observation, observation]) self.assertEqual(observations.shape, (2, 150)) expected_nodes = tf.constant([node_features, node_features]) expected_edge_features = tf.constant([edge_features, edge_features]) graph_dims = (num_nodes, num_features, num_edge_features) nodes, edges, edge_features = graph_observer.graph(observations, graph_dims) self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes))) self.assertTrue(tf.reduce_all(tf.equal(edge_features, expected_edge_features))) observations = np.array([observation, observation, observation]) # in dense mode, the nodes of all graphs are in a single list expected_nodes = tf.constant([node_features, node_features, node_features]) expected_nodes = tf.reshape(expected_nodes, [-1, num_features]) # the edges encoded in the adjacency list above expected_dense_edges = tf.constant([ # graph 1 [0, 1], [0, 2], [0, 3], [1, 0], [1, 2], [1, 3], [2, 0], [2, 1], [2, 3], [3, 0], [3, 1], [3, 2], # graph 2 [5, 6], [5, 7], [5, 8], [6, 5], [6, 7], [6, 8], [7, 5], [7, 6], [7, 8], [8, 5], [8, 6], [8, 7], # graph 3 [10, 11], [10, 12], [10, 13], [11, 10], [11, 12], [11, 13], [12, 10], [12, 11], [12, 13], [13, 10], [13, 11], [13, 12] ], dtype=tf.int32) expected_node_to_graph_map = tf.constant([ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 ]) observations = tf.convert_to_tensor(observations) print(observations) nodes, edges, node_to_graph_map, E =\ GraphObserver.graph(observations, graph_dims, dense=True) self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes))) self.assertTrue(tf.reduce_all(tf.equal(edges, expected_dense_edges))) # self.assertTrue(tf.reduce_all( # tf.equal(node_to_graph_map, expected_node_to_graph_map))) def test_agent_pruning(self): """ Verify that the observer correctly handles the case where there are less agents in the world than set as the limit. tl;dr: check that all entries of the node features, adjacency matrix, and edge features not corresponding to actually existing agents are zeros. """ num_agents = 25 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents observer = GraphObserver(params=params) obs, world = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension nodes, adjacency_matrix, edge_features = GraphObserver.graph( observations=obs, graph_dims=observer.graph_dimensions) self.assertEqual(nodes.shape, [1, num_agents, observer.feature_len]) expected_num_agents = len(world.agents) # nodes that do not represent agents, but are contained # to fill up the required observation space. expected_n_fill_up_nodes = num_agents - expected_num_agents fill_up_nodes = nodes[0, expected_num_agents:] self.assertEqual( fill_up_nodes.shape, [expected_n_fill_up_nodes, observer.feature_len]) # verify that entries for non-existing agents are all zeros self.assertEqual(tf.reduce_sum(fill_up_nodes), 0) # the equivalent for edges: verify that for each zero entry # in the adjacency matrix, the corresponding edge feature # vector is a zero vector of correct length. zero_indices = tf.where(tf.equal(adjacency_matrix, 0)) fill_up_edge_features = tf.gather_nd(edge_features, zero_indices) edge_feature_len = observer.graph_dimensions[2] zero_edge_feature_vectors = tf.zeros( [zero_indices.shape[0], edge_feature_len]) self.assertTrue(tf.reduce_all(tf.equal( fill_up_edge_features, zero_edge_feature_vectors)))
def __init__(self): params = ParameterServer() cont_highway_bp = ContinuousHighwayBlueprint(params) SingleAgentRuntime.__init__(self, blueprint=cont_highway_bp, render=True)
# bp = DiscreteHighwayBlueprint(params, # number_of_senarios=10, # random_seed=0) # arguments that are additionally set in the runtime # overwrite the ones of the blueprint # e.g. we can change observer to the cpp observer observer = NearestObserver(params) # viewer = MPViewer(params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) # viewer = VideoRenderer(renderer=viewer, # world_step_time=0.2, # fig_path="/Users/hart/2020/bark-ml/video/") env = SingleAgentRuntime(blueprint=bp, observer=observer, render=True) # gym interface env.reset() done = False while done is False: action = np.random.uniform( low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, )) observed_next_state, reward, done, info = env.step(action) print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}") # viewer.export_video( # filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)