def test_reward_shaping_evaluator(self):
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params)
    env = SingleAgentRuntime(blueprint=bp, render=True)
    env.reset()
    world = env._world

    eval_id = env._scenario._eval_agent_ids[0]
    observed_world = world.Observe([eval_id])[0]
    evaluator = RewardShapingEvaluator(params)
    action = np.array([0., 0.], dtype=np.float32)
    start_time = time.time()
    print(evaluator.Evaluate(observed_world, action))
    end_time = time.time()
    print(f"The reward shaping evaluator took {end_time-start_time} seconds.")
Esempio n. 2
0
 def __init__(self,
              num_scenarios=3,
              dump_dir=None,
              render=False,
              params=ParameterServer()):
     """Inits DataGenerator with the parameters (see class definition)."""
     self._dump_dir = dump_dir
     self._num_scenarios = num_scenarios
     self._params = params
     self._bp = ContinuousHighwayBlueprint(self._params,\
       number_of_senarios=self._num_scenarios, random_seed=0)
     self._observer = GraphObserver(params=self._params)
     self._env = SingleAgentRuntime(blueprint=self._bp,
                                    observer=self._observer,
                                    render=render)
Esempio n. 3
0
def run_configuration(argv):
    # Uncomment one of the following default parameter filename definitions,
    # depending on which GNN library you'd like to use.

    # File with standard parameters for tf2_gnn use:
    # param_filename = "examples/example_params/tfa_sac_gnn_tf2_gnn_default.json"

    # File with standard parameters for spektral use:
    param_filename = "examples/example_params/tfa_sac_gnn_spektral_default.json"
    params = ParameterServer(filename=param_filename)

    # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH"
    # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH"

    #viewer = MPViewer(
    #  params=params,
    #  x_range=[-35, 35],
    #  y_range=[-35, 35],
    #  follow_agent_id=True)

    #viewer = VideoRenderer(
    #  renderer=viewer,
    #  world_step_time=0.2,
    #  fig_path="/your_path_here/training/video/")

    # create environment
    bp = ContinuousHighwayBlueprint(params,
                                    number_of_senarios=2500,
                                    random_seed=0)

    observer = GraphObserver(params=params)

    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)

    sac_agent = BehaviorGraphSACAgent(environment=env,
                                      observer=observer,
                                      params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(5)
    elif FLAGS.mode == "evaluate":
        runner.Evaluate()
Esempio n. 4
0
  def test_nearest_observer(self):
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params)
    env = SingleAgentRuntime(blueprint=bp, render=True)
    env.reset()
    world = env._world

    # under test
    observer = NearestAgentsObserver(params)

    eval_id = env._scenario._eval_agent_ids[0]
    observed_world = world.Observe([eval_id])[0]
    start_time = time.time()
    observed_state = observer.Observe(observed_world)
    end_time = time.time()
    print(f"It took {end_time-start_time} seconds.")
    print(observed_state, observer.observation_space.shape)
Esempio n. 5
0
 def test_tracing_bark_world(self):
     params = ParameterServer()
     bp = ContinuousHighwayBlueprint(params)
     tracer = Tracer()
     env = SingleAgentRuntime(blueprint=bp, render=False)
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # NOTE: this also tests if a BARK agent is self-contained
     env.ml_behavior.set_actions_externally = False
     env.reset()
     bark_world = env._world
     for j in range(0, 2):
         for i in range(0, 5):
             bark_world.Step(0.2)
             eval_dict = bark_world.Evaluate()
             tracer.Trace(eval_dict, num_episode=j)
     self.assertEqual(len(tracer._states), 10)
Esempio n. 6
0
def run_configuration(argv):
    """ Main """
    params = ParameterServer(
        filename="examples/example_params/tfa_generate_params.json")
    # params = ParameterServer()
    output_dir = params["GenerateExpertTrajectories"]["OutputDirectory"]

    # create environment
    blueprint = params["World"]["Blueprint"]
    if blueprint == 'merging':
        bp = ContinuousMergingBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    elif blueprint == 'highway':
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    else:
        raise ValueError(f'{blueprint} is no valid blueprint.')

    env = SingleAgentRuntime(blueprint=bp, render=False)

    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunnerGenerator(params=params,
                                environment=env,
                                agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(params["Visualization"]["NumberOfEpisodes"])
    elif FLAGS.mode == "generate":
        expert_trajectories = runner.GenerateExpertTrajectories(
            num_trajectories=params["GenerateExpertTrajectories"]
            ["NumberOfTrajectories"],
            render=params["World"]["render"])
        save_expert_trajectories(output_dir=output_dir,
                                 expert_trajectories=expert_trajectories)

    # store all used params of the training
    # params.Save(os.path.join(Path.home(), "examples/example_params/tfa_params.json"))
    sys.exit(0)
Esempio n. 7
0
    def test_agent_and_runner(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        agent = BehaviorPPOAgent(environment=env, params=params)

        # set agent
        params["ML"]["PPORunner"]["NumberOfCollections"] = 2
        params["ML"]["SACRunner"]["NumberOfCollections"] = 2
        params["ML"]["TFARunner"]["EvaluationSteps"] = 2
        env.ml_behavior = agent
        self.assertEqual(env.ml_behavior.set_action_externally, False)
        runner = PPORunner(params=params, environment=env, agent=agent)
        runner.Train()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
        runner.Run()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
Esempio n. 8
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            b = ml_behavior.Plan(0.2, observed_world)
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            np.testing.assert_array_equal(a, b)
Esempio n. 9
0
    def test_tracer(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params)
        tracer = Tracer()
        env = SingleAgentRuntime(blueprint=bp, render=False)
        for i in range(0, 2):
            env.reset()
            for _ in range(0, 10):
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                data = (observed_next_state, reward, done,
                        info) = env.step(action)
                tracer.Trace(data, num_episode=i)

        # NOTE: test basic tracing
        self.assertEqual(len(tracer._states), 20)
        for i in range(0, 20):
            self.assertEqual("is_terminal" in tracer._states[i].keys(), True)
            self.assertEqual("reward" in tracer._states[i].keys(), True)
            self.assertEqual("collision" in tracer._states[i].keys(), True)
            self.assertEqual("drivable_area" in tracer._states[i].keys(), True)
            self.assertEqual("goal_reached" in tracer._states[i].keys(), True)
            self.assertEqual("step_count" in tracer._states[i].keys(), True)

        # NOTE: test pandas magic
        tracer.ConvertToDf()
        # average collisions
        print(
            tracer.Query(key="collision",
                         group_by="num_episode",
                         agg_type="MEAN").mean())
        # average reward
        print(
            tracer.Query(key="reward", group_by="num_episode",
                         agg_type="SUM").mean())

        # NOTE: test reset
        tracer.Reset()
        self.assertEqual(len(tracer._states), 0)
        self.assertEqual(tracer._df, None)
Esempio n. 10
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )
Esempio n. 11
0
    def _configurable_setup(self, params_filename):
        """Configurable GNN setup depending on a given filename

    Args:
      params_filename: str, corresponds to path of params file

    Returns:
      params: ParameterServer instance
      observer: GraphObserver instance
      actor: ActorNetwork of BehaviorGraphSACAgent
    """
        params = ParameterServer(filename=params_filename)
        observer = GraphObserver(params=params)
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
        # Get GNN SAC actor net
        sac_agent = BehaviorGraphSACAgent(environment=env,
                                          observer=observer,
                                          params=params)
        actor = sac_agent._agent._actor_network
        return params, observer, actor
Esempio n. 12
0
# BARK imports
from bark.runtime.commons.parameters import ParameterServer
from bark.runtime.viewer.matplotlib_viewer import MPViewer
from bark.runtime.viewer.video_renderer import VideoRenderer

# BARK-ML imports
from bark_ml.environments.single_agent_runtime import SingleAgentRuntime
from bark_ml.core.observers import NearestObserver
from bark_ml.environments.blueprints import ContinuousHighwayBlueprint, \
  DiscreteHighwayBlueprint

# create scenario
params = ParameterServer()
bp = ContinuousHighwayBlueprint(params,
                                number_of_senarios=10,
                                random_seed=0)
# bp = DiscreteHighwayBlueprint(params,
#                               number_of_senarios=10,
#                               random_seed=0)


# arguments that are additionally set in the runtime
# overwrite the ones of the blueprint
# e.g. we can change observer to the cpp observer
observer = NearestObserver(params)
# viewer = MPViewer(params=params,
#                   x_range=[-35, 35],
#                   y_range=[-35, 35],
#                   follow_agent_id=True)
# viewer = VideoRenderer(renderer=viewer,
Esempio n. 13
0
def run_configuration(argv):
    params = ParameterServer(
        filename="examples/example_params/gail_params.json")

    # Uncomment these to use the pretrained agents from https://github.com/GAIL-4-BARK/large_data_store
    # The agents are automatically integrated using bazel together with the expert trajectories
    # params["ML"]["GAILRunner"]["tf2rl"]["logdir"] = "../com_github_gail_4_bark_large_data_store/pretrained_agents/gail/merging"
    # params["ML"]["GAILRunner"]["tf2rl"]["model_dir"] = "../com_github_gail_4_bark_large_data_store/pretrained_agents/gail/merging"

    # When training a gail agent we add a suffix to the specified model and log dir to distinguish between training runs.
    # If you want to visualize or evaluate using your locally trained gail agent, you have to specify the run to use.
    # Therefore look into the directory specified in params["ML"]["GAILRunner"]["tf2rl"]["logdir"] and
    # pick one of your runs with the naming scheme '<timestamp>_DDPG_GAIL'
    # Please copy the folder to a location outside of bazel-bin, as the folders get deleted if bazel is run again.
    #
    # Replace the params["ML"]["GAILRunner"]["tf2rl"]["logdir"] and params["ML"]["GAILRunner"]["tf2rl"]["model_dir"]
    # in your example_params/gail_params.json definition with the path where you placed the trained agent.
    #
    # Alternatively set it from this script as in the following lines:
    # params["ML"]["GAILRunner"]["tf2rl"]["logdir"] = <insert-your-path>
    # params["ML"]["GAILRunner"]["tf2rl"]["model_dir"] = <insert-your-path>

    # create environment
    blueprint = params['World']['blueprint']
    if blueprint == 'merging':
        bp = ContinuousMergingBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    elif blueprint == 'highway':
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    else:
        raise ValueError(f'{FLAGS.blueprint} is no valid blueprint.')

    env = SingleAgentRuntime(blueprint=bp, render=False)

    # wrapped environment for compatibility with tf2rl
    wrapped_env = TF2RLWrapper(
        env, normalize_features=params["ML"]["Settings"]["NormalizeFeatures"])

    # GAIL-agent
    gail_agent = BehaviorGAILAgent(environment=wrapped_env, params=params)

    # np.random.seed(123456789)
    expert_trajectories = None
    if FLAGS.mode != 'visualize':
        expert_trajectories, avg_trajectory_length, num_trajectories = load_expert_trajectories(
            params['ML']['ExpertTrajectories']['expert_path_dir'],
            normalize_features=params["ML"]["Settings"]["NormalizeFeatures"],
            # the unwrapped env has to be used, since that contains the unnormalized spaces.
            env=env,
            subset_size=params['ML']['ExpertTrajectories']['subset_size'])

    runner = GAILRunner(params=params,
                        environment=wrapped_env,
                        agent=gail_agent,
                        expert_trajs=expert_trajectories)

    if FLAGS.mode == "train":
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(params["Visualization"]["NumberOfScenarios"])
    elif FLAGS.mode == "evaluate":
        runner.Evaluate(expert_trajectories, avg_trajectory_length,
                        num_trajectories)