def test_agents(self): params = ParameterServer() params["ML"]["BaseAgent"]["NumSteps"] = 2 params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) # IQN Agent iqn_agent = IQNAgent(env=env, test_env=env, params=params) env.ml_behavior = iqn_agent self.assertEqual(env.ml_behavior.set_action_externally, False) iqn_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True) # FQF Agent fqf_agent = FQFAgent(env=env, test_env=env, params=params) env.ml_behavior = fqf_agent self.assertEqual(env.ml_behavior.set_action_externally, False) fqf_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True) # QRDQN Agent qrdqn_agent = QRDQNAgent(env=env, test_env=env, params=params) env.ml_behavior = qrdqn_agent self.assertEqual(env.ml_behavior.set_action_externally, False) qrdqn_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True)
def run_configuration(argv): # params = ParameterServer(filename="examples/example_params/tfa_params.json") params = ParameterServer() # NOTE: Modify these paths in order to save the checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH" # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH" params["World"]["remove_agents_out_of_map"] = True # create environment bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) # PPO-agent # ppo_agent = BehaviorPPOAgent(environment=env, # params=params) # env.ml_behavior = ppo_agent # runner = PPORunner(params=params, # environment=env, # agent=ppo_agent) # SAC-agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Run(num_episodes=10, render=True) elif FLAGS.mode == "evaluate": runner.Run(num_episodes=100, render=False)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) #env = gym.make("highway-v1", params=params) ml_behaviors = [] # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params)) ml_behaviors.append(FQFAgent(env=env, params=params)) # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() action = np.random.randint(low=0, high=env.action_space.n) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] # do a random action and plan trajectory action = np.random.randint(low=1, high=env.action_space.n) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) # sample another different random action another_action = action while another_action == action: another_action = np.random.randint(low=1, high=env.action_space.n) # plan trajectory for the another action ml_behavior.ActionToBehavior(another_action) b = ml_behavior.Plan(0.2, observed_world) # the trajectory generated by two different actions shoould be different self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. ml_behavior._set_action_externally = False a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) last_action = ml_behavior.GetLastAction() self.assertTrue(isinstance(last_action, float)) # same trajectory for same state np.testing.assert_array_equal(a, b)
def test_agent_and_runner(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) agent = BehaviorPPOAgent(environment=env, params=params) # set agent env.ml_behavior = agent runner = PPORunner(params=params, environment=env, agent=agent) # runner.Train() runner.Visualize()
def run_configuration(argv): # Uncomment one of the following default parameter filename definitions, # depending on which GNN library you'd like to use. # File with standard parameters for tf2_gnn use: # param_filename = "examples/example_params/tfa_sac_gnn_tf2_gnn_default.json" # File with standard parameters for spektral use: param_filename = "examples/example_params/tfa_sac_gnn_spektral_default.json" params = ParameterServer(filename=param_filename) # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH" # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH" #viewer = MPViewer( # params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) #viewer = VideoRenderer( # renderer=viewer, # world_step_time=0.2, # fig_path="/your_path_here/training/video/") # create environment bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Visualize(5) elif FLAGS.mode == "evaluate": runner.Evaluate()
def test_sac_graph_agent(self): params = ParameterServer() bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) env.ml_behavior = sac_agent env.reset() eval_id = env._scenario._eval_agent_ids[0] self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent) for _ in range(0, 5): env._world.Step(0.2)
def test_configurable_blueprint(self): params = ParameterServer( filename="bark_ml/tests/data/highway_merge_configurable.json") # continuous model ml_behavior = BehaviorContinuousML(params=params) bp = ConfigurableScenarioBlueprint(params=params, ml_behavior=ml_behavior) env = SingleAgentRuntime(blueprint=bp, render=False) # agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # test run env.reset() for _ in range(0, 5): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_tracing_bark_world(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # NOTE: this also tests if a BARK agent is self-contained env.ml_behavior.set_actions_externally = False env.reset() bark_world = env._world for j in range(0, 2): for i in range(0, 5): bark_world.Step(0.2) eval_dict = bark_world.Evaluate() tracer.Trace(eval_dict, num_episode=j) self.assertEqual(len(tracer._states), 10)
def run_configuration(argv): """ Main """ params = ParameterServer( filename="examples/example_params/tfa_generate_params.json") # params = ParameterServer() output_dir = params["GenerateExpertTrajectories"]["OutputDirectory"] # create environment blueprint = params["World"]["Blueprint"] if blueprint == 'merging': bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) elif blueprint == 'highway': bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0) else: raise ValueError(f'{blueprint} is no valid blueprint.') env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent runner = SACRunnerGenerator(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Visualize(params["Visualization"]["NumberOfEpisodes"]) elif FLAGS.mode == "generate": expert_trajectories = runner.GenerateExpertTrajectories( num_trajectories=params["GenerateExpertTrajectories"] ["NumberOfTrajectories"], render=params["World"]["render"]) save_expert_trajectories(output_dir=output_dir, expert_trajectories=expert_trajectories) # store all used params of the training # params.Save(os.path.join(Path.home(), "examples/example_params/tfa_params.json")) sys.exit(0)
def test_agent_and_runner(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) agent = BehaviorPPOAgent(environment=env, params=params) # set agent params["ML"]["PPORunner"]["NumberOfCollections"] = 2 params["ML"]["SACRunner"]["NumberOfCollections"] = 2 params["ML"]["TFARunner"]["EvaluationSteps"] = 2 env.ml_behavior = agent self.assertEqual(env.ml_behavior.set_action_externally, False) runner = PPORunner(params=params, environment=env, agent=agent) runner.Train() self.assertEqual(env.ml_behavior.set_action_externally, True) runner.Run() self.assertEqual(env.ml_behavior.set_action_externally, True)
def run_configuration(argv): params = ParameterServer( filename="examples/example_params/tfa_params.json") # params = ParameterServer() # NOTE: Modify these paths in order to save the checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/" # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/" params["Visualization"]["Agents"]["Alpha"]["Other"] = 0.2 params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2 params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2 params["ML"]["VisualizeCfWorlds"] = False params["ML"]["VisualizeCfHeatmap"] = True params["World"]["remove_agents_out_of_map"] = False viewer = MPViewer(params=params, x_range=[-35, 35], y_range=[-35, 35], follow_agent_id=True) # create environment bp = ContinuousMergingBlueprint(params, num_scenarios=10000, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False, viewer=viewer) # PPO-agent # ppo_agent = BehaviorPPOAgent(environment=env, # params=params) # env.ml_behavior = ppo_agent # runner = PPORunner(params=params, # environment=env, # agent=ppo_agent) # SAC-agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Run(num_episodes=50, render=True) elif FLAGS.mode == "evaluate": runner.Run(num_episodes=100, render=False)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) b = ml_behavior.Plan(0.2, observed_world) self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) np.testing.assert_array_equal(a, b)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=True) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" )
def run_configuration(argv): params = ParameterServer() # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/" # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/" #viewer = MPViewer( # params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) #viewer = VideoRenderer( # renderer=viewer, # world_step_time=0.2, # fig_path="/your_path_here/training/video/") # create environment bp = ContinuousMergingBlueprint(params, num_scenarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params, init_gnn='init_interaction_network') env.ml_behavior = sac_agent runner = SACRunner(params=params, environment=env, agent=sac_agent) if FLAGS.mode == "train": runner.SetupSummaryWriter() runner.Train() elif FLAGS.mode == "visualize": runner.Run(num_episodes=10, render=True) elif FLAGS.mode == "evaluate": runner.Run(num_episodes=250, render=False)