def test_agents(self):
    params = ParameterServer()
    params["ML"]["BaseAgent"]["NumSteps"] = 2
    params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

    bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False)

    # IQN Agent
    iqn_agent = IQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = iqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    iqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # FQF Agent
    fqf_agent = FQFAgent(env=env, test_env=env, params=params)
    env.ml_behavior = fqf_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    fqf_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # QRDQN Agent
    qrdqn_agent = QRDQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = qrdqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    qrdqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)
Beispiel #2
0
def run_configuration(argv):
    # params = ParameterServer(filename="examples/example_params/tfa_params.json")
    params = ParameterServer()
    # NOTE: Modify these paths in order to save the checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH"
    # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH"
    params["World"]["remove_agents_out_of_map"] = True

    # create environment
    bp = ContinuousMergingBlueprint(params,
                                    number_of_senarios=2500,
                                    random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False)

    # PPO-agent
    # ppo_agent = BehaviorPPOAgent(environment=env,
    #                              params=params)
    # env.ml_behavior = ppo_agent
    # runner = PPORunner(params=params,
    #                    environment=env,
    #                    agent=ppo_agent)

    # SAC-agent
    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)
    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Run(num_episodes=10, render=True)
    elif FLAGS.mode == "evaluate":
        runner.Run(num_episodes=100, render=False)
Beispiel #3
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        #env = gym.make("highway-v1", params=params)
        ml_behaviors = []
        # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
        ml_behaviors.append(FQFAgent(env=env, params=params))
        # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            action = np.random.randint(low=0, high=env.action_space.n)
            observed_next_state, reward, done, info = env.step(action)
            print(
                f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
            )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]

            # do a random action and plan trajectory
            action = np.random.randint(low=1, high=env.action_space.n)
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)

            # sample another different random action
            another_action = action
            while another_action == action:
                another_action = np.random.randint(low=1,
                                                   high=env.action_space.n)

            # plan trajectory for the another action
            ml_behavior.ActionToBehavior(another_action)
            b = ml_behavior.Plan(0.2, observed_world)

            # the trajectory generated by two different actions shoould be different
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            ml_behavior._set_action_externally = False
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            last_action = ml_behavior.GetLastAction()
            self.assertTrue(isinstance(last_action, float))

            # same trajectory for same state
            np.testing.assert_array_equal(a, b)
Beispiel #4
0
    def test_agent_and_runner(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        agent = BehaviorPPOAgent(environment=env, params=params)

        # set agent
        env.ml_behavior = agent
        runner = PPORunner(params=params, environment=env, agent=agent)
        # runner.Train()
        runner.Visualize()
Beispiel #5
0
def run_configuration(argv):
    # Uncomment one of the following default parameter filename definitions,
    # depending on which GNN library you'd like to use.

    # File with standard parameters for tf2_gnn use:
    # param_filename = "examples/example_params/tfa_sac_gnn_tf2_gnn_default.json"

    # File with standard parameters for spektral use:
    param_filename = "examples/example_params/tfa_sac_gnn_spektral_default.json"
    params = ParameterServer(filename=param_filename)

    # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH"
    # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH"

    #viewer = MPViewer(
    #  params=params,
    #  x_range=[-35, 35],
    #  y_range=[-35, 35],
    #  follow_agent_id=True)

    #viewer = VideoRenderer(
    #  renderer=viewer,
    #  world_step_time=0.2,
    #  fig_path="/your_path_here/training/video/")

    # create environment
    bp = ContinuousHighwayBlueprint(params,
                                    number_of_senarios=2500,
                                    random_seed=0)

    observer = GraphObserver(params=params)

    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)

    sac_agent = BehaviorGraphSACAgent(environment=env,
                                      observer=observer,
                                      params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(5)
    elif FLAGS.mode == "evaluate":
        runner.Evaluate()
Beispiel #6
0
 def test_sac_graph_agent(self):
     params = ParameterServer()
     bp = ContinuousMergingBlueprint(params,
                                     number_of_senarios=2500,
                                     random_seed=0)
     observer = GraphObserver(params=params)
     env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
     sac_agent = BehaviorGraphSACAgent(environment=env,
                                       observer=observer,
                                       params=params)
     env.ml_behavior = sac_agent
     env.reset()
     eval_id = env._scenario._eval_agent_ids[0]
     self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent)
     for _ in range(0, 5):
         env._world.Step(0.2)
Beispiel #7
0
 def test_configurable_blueprint(self):
     params = ParameterServer(
         filename="bark_ml/tests/data/highway_merge_configurable.json")
     # continuous model
     ml_behavior = BehaviorContinuousML(params=params)
     bp = ConfigurableScenarioBlueprint(params=params,
                                        ml_behavior=ml_behavior)
     env = SingleAgentRuntime(blueprint=bp, render=False)
     # agent
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # test run
     env.reset()
     for _ in range(0, 5):
         action = np.random.randint(low=0, high=3)
         observed_next_state, reward, done, info = env.step(action)
Beispiel #8
0
 def test_tracing_bark_world(self):
     params = ParameterServer()
     bp = ContinuousHighwayBlueprint(params)
     tracer = Tracer()
     env = SingleAgentRuntime(blueprint=bp, render=False)
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # NOTE: this also tests if a BARK agent is self-contained
     env.ml_behavior.set_actions_externally = False
     env.reset()
     bark_world = env._world
     for j in range(0, 2):
         for i in range(0, 5):
             bark_world.Step(0.2)
             eval_dict = bark_world.Evaluate()
             tracer.Trace(eval_dict, num_episode=j)
     self.assertEqual(len(tracer._states), 10)
Beispiel #9
0
def run_configuration(argv):
    """ Main """
    params = ParameterServer(
        filename="examples/example_params/tfa_generate_params.json")
    # params = ParameterServer()
    output_dir = params["GenerateExpertTrajectories"]["OutputDirectory"]

    # create environment
    blueprint = params["World"]["Blueprint"]
    if blueprint == 'merging':
        bp = ContinuousMergingBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    elif blueprint == 'highway':
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    else:
        raise ValueError(f'{blueprint} is no valid blueprint.')

    env = SingleAgentRuntime(blueprint=bp, render=False)

    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunnerGenerator(params=params,
                                environment=env,
                                agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(params["Visualization"]["NumberOfEpisodes"])
    elif FLAGS.mode == "generate":
        expert_trajectories = runner.GenerateExpertTrajectories(
            num_trajectories=params["GenerateExpertTrajectories"]
            ["NumberOfTrajectories"],
            render=params["World"]["render"])
        save_expert_trajectories(output_dir=output_dir,
                                 expert_trajectories=expert_trajectories)

    # store all used params of the training
    # params.Save(os.path.join(Path.home(), "examples/example_params/tfa_params.json"))
    sys.exit(0)
Beispiel #10
0
    def test_agent_and_runner(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        agent = BehaviorPPOAgent(environment=env, params=params)

        # set agent
        params["ML"]["PPORunner"]["NumberOfCollections"] = 2
        params["ML"]["SACRunner"]["NumberOfCollections"] = 2
        params["ML"]["TFARunner"]["EvaluationSteps"] = 2
        env.ml_behavior = agent
        self.assertEqual(env.ml_behavior.set_action_externally, False)
        runner = PPORunner(params=params, environment=env, agent=agent)
        runner.Train()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
        runner.Run()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
Beispiel #11
0
def run_configuration(argv):
    params = ParameterServer(
        filename="examples/example_params/tfa_params.json")
    # params = ParameterServer()
    # NOTE: Modify these paths in order to save the checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/"
    # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/"
    params["Visualization"]["Agents"]["Alpha"]["Other"] = 0.2
    params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2
    params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2
    params["ML"]["VisualizeCfWorlds"] = False
    params["ML"]["VisualizeCfHeatmap"] = True
    params["World"]["remove_agents_out_of_map"] = False

    viewer = MPViewer(params=params,
                      x_range=[-35, 35],
                      y_range=[-35, 35],
                      follow_agent_id=True)

    # create environment
    bp = ContinuousMergingBlueprint(params, num_scenarios=10000, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False, viewer=viewer)

    # PPO-agent
    # ppo_agent = BehaviorPPOAgent(environment=env,
    #                              params=params)
    # env.ml_behavior = ppo_agent
    # runner = PPORunner(params=params,
    #                    environment=env,
    #                    agent=ppo_agent)

    # SAC-agent
    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)
    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Run(num_episodes=50, render=True)
    elif FLAGS.mode == "evaluate":
        runner.Run(num_episodes=100, render=False)
Beispiel #12
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            b = ml_behavior.Plan(0.2, observed_world)
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            np.testing.assert_array_equal(a, b)
Beispiel #13
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )
Beispiel #14
0
def run_configuration(argv):
    params = ParameterServer()

    # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/"
    # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/"

    #viewer = MPViewer(
    #  params=params,
    #  x_range=[-35, 35],
    #  y_range=[-35, 35],
    #  follow_agent_id=True)
    #viewer = VideoRenderer(
    #  renderer=viewer,
    #  world_step_time=0.2,
    #  fig_path="/your_path_here/training/video/")

    # create environment
    bp = ContinuousMergingBlueprint(params, num_scenarios=2500, random_seed=0)

    observer = GraphObserver(params=params)

    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
    sac_agent = BehaviorGraphSACAgent(environment=env,
                                      observer=observer,
                                      params=params,
                                      init_gnn='init_interaction_network')
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Run(num_episodes=10, render=True)
    elif FLAGS.mode == "evaluate":
        runner.Run(num_episodes=250, render=False)