Ejemplo n.º 1
0
def run_configuration(argv):
    # Uncomment one of the following default parameter filename definitions,
    # depending on which GNN library you'd like to use.

    # File with standard parameters for tf2_gnn use:
    # param_filename = "examples/example_params/tfa_sac_gnn_tf2_gnn_default.json"

    # File with standard parameters for spektral use:
    param_filename = "examples/example_params/tfa_sac_gnn_spektral_default.json"
    params = ParameterServer(filename=param_filename)

    # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "YOUR_PATH"
    # params["ML"]["TFARunner"]["SummaryPath"] = "YOUR_PATH"

    #viewer = MPViewer(
    #  params=params,
    #  x_range=[-35, 35],
    #  y_range=[-35, 35],
    #  follow_agent_id=True)

    #viewer = VideoRenderer(
    #  renderer=viewer,
    #  world_step_time=0.2,
    #  fig_path="/your_path_here/training/video/")

    # create environment
    bp = ContinuousHighwayBlueprint(params,
                                    number_of_senarios=2500,
                                    random_seed=0)

    observer = GraphObserver(params=params)

    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)

    sac_agent = BehaviorGraphSACAgent(environment=env,
                                      observer=observer,
                                      params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(5)
    elif FLAGS.mode == "evaluate":
        runner.Evaluate()
Ejemplo n.º 2
0
def configurable_setup(params, num_scenarios, graph_sac=True):
  """Configurable GNN setup depending on a given filename

  Args:
    params: ParameterServer instance

  Returns: 
    observer: GraphObserver instance
    actor: ActorNetwork of BehaviorGraphSACAgent
  """
  observer = GraphObserver(params=params)
  bp = ContinuousHighwayBlueprint(params,
                                  number_of_senarios=num_scenarios,
                                  random_seed=0)
  env = SingleAgentRuntime(blueprint=bp, observer=observer,
                            render=False)
  if graph_sac:
    # Get GNN SAC actor net
    sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer,
                                      params=params)
  else:
    sac_agent = BehaviorSACAgent(environment=env, params=params)

  actor = sac_agent._agent._actor_network
  return observer, actor
Ejemplo n.º 3
0
  def test_gnn_parameters(self):
    params = ParameterServer()
    params["ML"]["BehaviorGraphSACAgent"]["GNN"]["NumMpLayers"] = 4
    params["ML"]["BehaviorGraphSACAgent"]["GNN"]["MpLayerNumUnits"] = 64
    params["ML"]["BehaviorGraphSACAgent"]["GNN"]["message_calculation_class"] = "gnn_edge_mlp"
    params["ML"]["BehaviorGraphSACAgent"]["GNN"]["global_exchange_mode"] = "mean"
    
    gnn_library = GNNWrapper.SupportedLibrary.spektral
    params["ML"]["BehaviorGraphSACAgent"]["GNN"]["Library"] = gnn_library

    
    bp = ContinuousHighwayBlueprint(params, number_of_senarios=2500, random_seed=0)
    observer = GraphObserver(params=params)
    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
    sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params)

    actor_gnn = sac_agent._agent._actor_network._gnn
    critic_gnn = sac_agent._agent._critic_network_1._gnn

    for gnn in [actor_gnn, critic_gnn]:
      self.assertEqual(gnn._params["NumMpLayers"], 4)
      self.assertEqual(gnn._params["MpLayerNumUnits"], 64)
      self.assertEqual(gnn._params["message_calculation_class"], "gnn_edge_mlp")
      self.assertEqual(gnn._params["global_exchange_mode"], "mean")
      self.assertEqual(gnn._params["Library"], gnn_library)
Ejemplo n.º 4
0
 def test_sac_graph_agent(self):
     params = ParameterServer()
     bp = ContinuousMergingBlueprint(params,
                                     number_of_senarios=2500,
                                     random_seed=0)
     observer = GraphObserver(params=params)
     env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
     sac_agent = BehaviorGraphSACAgent(environment=env,
                                       observer=observer,
                                       params=params)
     env.ml_behavior = sac_agent
     env.reset()
     eval_id = env._scenario._eval_agent_ids[0]
     self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent)
     for _ in range(0, 5):
         env._world.Step(0.2)
 def __init__(self,
              blueprint=None,
              ml_behavior=None,
              observer=None,
              evaluator=None,
              step_time=None,
              viewer=None,
              scenario_generator=None,
              render=False,
              max_col_rate=0.1,
              behavior_model_pool=None,
              ego_rule_based=None,
              params=None):
   SingleAgentRuntime.__init__(
     self,
     blueprint=blueprint,
     ml_behavior=ml_behavior,
     observer=observer,
     evaluator=evaluator,
     step_time=step_time,
     viewer=viewer,
     scenario_generator=scenario_generator,
     render=render)
   self._params = params or ParameterServer()
   self._max_col_rate = params["ML"][
     "MaxColRate",
     "Max. collision rate allowed over all counterfactual worlds.", 0.1]
   self._cf_simulation_steps = params["ML"][
     "CfSimSteps",
     "Simulation steps for the counterfactual worlds.", 5]
   self._visualize_cf_worlds = params["ML"][
     "VisualizeCfWorlds",
     "Whether the counterfactual worlds are visualized.", False]
   self._visualize_heatmap = params["ML"][
     "VisualizeCfHeatmap",
     "Whether the heatmap is being visualized.", False]
   self._results_folder = params["ML"][
     "ResultsFolder",
     "Whether the heatmap is being visualized.", "./"]
   self._logger = logging.getLogger()
   self._behavior_model_pool = behavior_model_pool or []
   self._ego_rule_based = ego_rule_based or BehaviorIDMLaneTracking(self._params)
   self._tracer = Tracer()
   if self._visualize_heatmap:
     _, self._axs_heatmap = plt.subplots(1, 1, constrained_layout=True)
   self._count = 0
   self._cf_axs = {}
Ejemplo n.º 6
0
 def test_tracing_bark_world(self):
     params = ParameterServer()
     bp = ContinuousHighwayBlueprint(params)
     tracer = Tracer()
     env = SingleAgentRuntime(blueprint=bp, render=False)
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # NOTE: this also tests if a BARK agent is self-contained
     env.ml_behavior.set_actions_externally = False
     env.reset()
     bark_world = env._world
     for j in range(0, 2):
         for i in range(0, 5):
             bark_world.Step(0.2)
             eval_dict = bark_world.Evaluate()
             tracer.Trace(eval_dict, num_episode=j)
     self.assertEqual(len(tracer._states), 10)
Ejemplo n.º 7
0
  def test_nearest_observer(self):
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params)
    env = SingleAgentRuntime(blueprint=bp, render=True)
    env.reset()
    world = env._world

    # under test
    observer = NearestAgentsObserver(params)

    eval_id = env._scenario._eval_agent_ids[0]
    observed_world = world.Observe([eval_id])[0]
    start_time = time.time()
    observed_state = observer.Observe(observed_world)
    end_time = time.time()
    print(f"It took {end_time-start_time} seconds.")
    print(observed_state, observer.observation_space.shape)
Ejemplo n.º 8
0
def run_configuration(argv):
    """ Main """
    params = ParameterServer(
        filename="examples/example_params/tfa_generate_params.json")
    # params = ParameterServer()
    output_dir = params["GenerateExpertTrajectories"]["OutputDirectory"]

    # create environment
    blueprint = params["World"]["Blueprint"]
    if blueprint == 'merging':
        bp = ContinuousMergingBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    elif blueprint == 'highway':
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2500,
                                        random_seed=0)
    else:
        raise ValueError(f'{blueprint} is no valid blueprint.')

    env = SingleAgentRuntime(blueprint=bp, render=False)

    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunnerGenerator(params=params,
                                environment=env,
                                agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Visualize(params["Visualization"]["NumberOfEpisodes"])
    elif FLAGS.mode == "generate":
        expert_trajectories = runner.GenerateExpertTrajectories(
            num_trajectories=params["GenerateExpertTrajectories"]
            ["NumberOfTrajectories"],
            render=params["World"]["render"])
        save_expert_trajectories(output_dir=output_dir,
                                 expert_trajectories=expert_trajectories)

    # store all used params of the training
    # params.Save(os.path.join(Path.home(), "examples/example_params/tfa_params.json"))
    sys.exit(0)
Ejemplo n.º 9
0
 def setUp(self):
   """Setup
   """
   self.params = ParameterServer(
     filename="bark_ml/tests/py_library_tf2rl_tests/data/params.json")
   bp = ContinuousMergingBlueprint(self.params,
                                   number_of_senarios=10,
                                   random_seed=0)
   self.env = SingleAgentRuntime(blueprint=bp,
                                 render=False)
Ejemplo n.º 10
0
    def test_agent_and_runner(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        agent = BehaviorPPOAgent(environment=env, params=params)

        # set agent
        params["ML"]["PPORunner"]["NumberOfCollections"] = 2
        params["ML"]["SACRunner"]["NumberOfCollections"] = 2
        params["ML"]["TFARunner"]["EvaluationSteps"] = 2
        env.ml_behavior = agent
        self.assertEqual(env.ml_behavior.set_action_externally, False)
        runner = PPORunner(params=params, environment=env, agent=agent)
        runner.Train()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
        runner.Run()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
Ejemplo n.º 11
0
def run_configuration(argv):
    params = ParameterServer(
        filename="examples/example_params/tfa_params.json")
    # params = ParameterServer()
    # NOTE: Modify these paths in order to save the checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/"
    # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merging_nn/"
    params["Visualization"]["Agents"]["Alpha"]["Other"] = 0.2
    params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2
    params["Visualization"]["Agents"]["Alpha"]["Controlled"] = 0.2
    params["ML"]["VisualizeCfWorlds"] = False
    params["ML"]["VisualizeCfHeatmap"] = True
    params["World"]["remove_agents_out_of_map"] = False

    viewer = MPViewer(params=params,
                      x_range=[-35, 35],
                      y_range=[-35, 35],
                      follow_agent_id=True)

    # create environment
    bp = ContinuousMergingBlueprint(params, num_scenarios=10000, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False, viewer=viewer)

    # PPO-agent
    # ppo_agent = BehaviorPPOAgent(environment=env,
    #                              params=params)
    # env.ml_behavior = ppo_agent
    # runner = PPORunner(params=params,
    #                    environment=env,
    #                    agent=ppo_agent)

    # SAC-agent
    sac_agent = BehaviorSACAgent(environment=env, params=params)
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)
    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Run(num_episodes=50, render=True)
    elif FLAGS.mode == "evaluate":
        runner.Run(num_episodes=100, render=False)
  def step(self, action):
    """perform the cf evaluation"""
    # simulate counterfactual worlds
    local_tracer = Tracer()
    eval_id = self._scenario._eval_agent_ids[0]
    self.St()
    cf_worlds = self.GenerateCounterfactualWorlds()
    for v in self._cf_axs.values():
      v["count"] = 0
    for i, cf_world in enumerate(cf_worlds):
      cf_key = list(cf_world.keys())[0]
      self.SimulateWorld(
        cf_world[cf_key], local_tracer, N=self._cf_simulation_steps,
        replaced_agent=cf_key, num_virtual_world=i)
    self.Et()

    # NOTE: this world would actually have the predicted traj.
    gt_world = self.ReplaceBehaviorModel()
    self.SimulateWorld(
      gt_world, local_tracer, N=self._cf_simulation_steps,
      replaced_agent="None", num_virtual_world="None")
    # NOTE: outsource
    hist = gt_world.agents[eval_id].history
    traj = np.stack([x[0] for x in hist])
    # self._viewer.drawTrajectory(traj, color='blue')

    if self._visualize_heatmap:
      self.DrawHeatmap(
        local_tracer,
        filename=self._results_folder + "cf_%03d" % self._count + "_heatmap")

    # evaluate counterfactual worlds
    trace = self.TraceCounterfactualWorldStats(local_tracer)
    collision_rate = trace['collision']/len(self._behavior_model_pool)
    print(collision_rate)
    self._logger.info(
      f"The counterfactual worlds have a collision" + \
      f"-rate of {collision_rate:.3f}.")

    # choose a policy
    executed_learned_policy = 1
    if collision_rate > self._max_col_rate:
      executed_learned_policy = 0
      self._logger.info(
        f"Executing fallback model.")
      self._world.agents[eval_id].behavior_model = self._ego_rule_based
    trace["executed_learned_policy"] = executed_learned_policy
    self._tracer.Trace(trace)
    self._count += 1
    for fig in self._cf_axs.values():
      for sub_ax in fig["ax"]:
        sub_ax.clear()
    return SingleAgentRuntime.step(self, action)
Ejemplo n.º 13
0
    def test_tracer(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params)
        tracer = Tracer()
        env = SingleAgentRuntime(blueprint=bp, render=False)
        for i in range(0, 2):
            env.reset()
            for _ in range(0, 10):
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                data = (observed_next_state, reward, done,
                        info) = env.step(action)
                tracer.Trace(data, num_episode=i)

        # NOTE: test basic tracing
        self.assertEqual(len(tracer._states), 20)
        for i in range(0, 20):
            self.assertEqual("is_terminal" in tracer._states[i].keys(), True)
            self.assertEqual("reward" in tracer._states[i].keys(), True)
            self.assertEqual("collision" in tracer._states[i].keys(), True)
            self.assertEqual("drivable_area" in tracer._states[i].keys(), True)
            self.assertEqual("goal_reached" in tracer._states[i].keys(), True)
            self.assertEqual("step_count" in tracer._states[i].keys(), True)

        # NOTE: test pandas magic
        tracer.ConvertToDf()
        # average collisions
        print(
            tracer.Query(key="collision",
                         group_by="num_episode",
                         agg_type="MEAN").mean())
        # average reward
        print(
            tracer.Query(key="reward", group_by="num_episode",
                         agg_type="SUM").mean())

        # NOTE: test reset
        tracer.Reset()
        self.assertEqual(len(tracer._states), 0)
        self.assertEqual(tracer._df, None)
Ejemplo n.º 14
0
    def test_agents(self):
        params = ParameterServer()
        params["ML"]["BaseAgent"]["NumSteps"] = 2
        params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)

        # IQN Agent
        # iqn_agent = IQNAgent(env=env, test_env=env, params=params)
        # env.ml_behavior = iqn_agent
        # self.assertEqual(env.ml_behavior.set_action_externally, False)
        # iqn_agent.run()
        # self.assertEqual(env.ml_behavior.set_action_externally, True)

        # FQF Agent
        fqf_agent = FQFAgent(env=env, params=params)
        env.ml_behavior = fqf_agent
        self.assertEqual(env.ml_behavior.set_action_externally, False)
        fqf_agent.train()
        self.assertEqual(env.ml_behavior.set_action_externally, True)
Ejemplo n.º 15
0
def run_configuration(argv):
    params = ParameterServer()

    # NOTE: Modify these paths to specify your preferred path for checkpoints and summaries
    # params["ML"]["BehaviorTFAAgents"]["CheckpointPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/"
    # params["ML"]["TFARunner"]["SummaryPath"] = "/Users/hart/Development/bark-ml/checkpoints_merge_spektral_att2/"

    #viewer = MPViewer(
    #  params=params,
    #  x_range=[-35, 35],
    #  y_range=[-35, 35],
    #  follow_agent_id=True)
    #viewer = VideoRenderer(
    #  renderer=viewer,
    #  world_step_time=0.2,
    #  fig_path="/your_path_here/training/video/")

    # create environment
    bp = ContinuousMergingBlueprint(params, num_scenarios=2500, random_seed=0)

    observer = GraphObserver(params=params)

    env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
    sac_agent = BehaviorGraphSACAgent(environment=env,
                                      observer=observer,
                                      params=params,
                                      init_gnn='init_interaction_network')
    env.ml_behavior = sac_agent
    runner = SACRunner(params=params, environment=env, agent=sac_agent)

    if FLAGS.mode == "train":
        runner.SetupSummaryWriter()
        runner.Train()
    elif FLAGS.mode == "visualize":
        runner.Run(num_episodes=10, render=True)
    elif FLAGS.mode == "evaluate":
        runner.Run(num_episodes=250, render=False)
Ejemplo n.º 16
0
 def test_general_evaluator(self):
   params = ParameterServer()
   bp = ContinuousSingleLaneBlueprint(params)
   env = SingleAgentRuntime(blueprint=bp, render=True)
   evaluator = GeneralEvaluator(params)
   env._evaluator = evaluator
   env.reset()
   for _ in range(0, 4):
     state, terminal, reward, info = env.step(np.array([0., 0.]))
     print(terminal, reward)
Ejemplo n.º 17
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        #env = gym.make("highway-v1", params=params)
        ml_behaviors = []
        # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
        ml_behaviors.append(FQFAgent(env=env, params=params))
        # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            action = np.random.randint(low=0, high=env.action_space.n)
            observed_next_state, reward, done, info = env.step(action)
            print(
                f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
            )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]

            # do a random action and plan trajectory
            action = np.random.randint(low=1, high=env.action_space.n)
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)

            # sample another different random action
            another_action = action
            while another_action == action:
                another_action = np.random.randint(low=1,
                                                   high=env.action_space.n)

            # plan trajectory for the another action
            ml_behavior.ActionToBehavior(another_action)
            b = ml_behavior.Plan(0.2, observed_world)

            # the trajectory generated by two different actions shoould be different
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            ml_behavior._set_action_externally = False
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            last_action = ml_behavior.GetLastAction()
            self.assertTrue(isinstance(last_action, float))

            # same trajectory for same state
            np.testing.assert_array_equal(a, b)
Ejemplo n.º 18
0
    def setUp(self):
        """
    setup
    """
        self.params = ParameterServer(
            filename=os.path.join(os.path.dirname(__file__),
                                  "gail_data/params/gail_params_bark.json"))

        local_params = self.params["ML"]["GAILRunner"]["tf2rl"]

        # creating the dirs for logging if they are not present already:
        for key in ['logdir', 'model_dir', 'expert_path_dir']:
            local_params[key] = os.path.join(Path.home(), local_params[key])
            if not os.path.exists(local_params[key]):
                os.makedirs(local_params[key])

        # create environment
        self.bp = ContinuousMergingBlueprint(self.params,
                                             number_of_senarios=500,
                                             random_seed=0)
        self.env = SingleAgentRuntime(blueprint=self.bp, render=False)

        # wrapped environment for compatibility with tf2rl
        self.wrapped_env = TF2RLWrapper(self.env)

        # Dummy expert trajectories:
        self.expert_trajs = {
            'obses': np.zeros((10, 16)),
            'next_obses': np.ones((10, 16)),
            'acts': 2 * np.ones((10, 2))
        }

        # create agent and runner:
        self.agent = BehaviorGAILAgent(environment=self.wrapped_env,
                                       params=self.params)
        self.env.ml_behavior = self.agent
        self.runner = GAILRunner(environment=self.wrapped_env,
                                 agent=self.agent,
                                 params=self.params,
                                 expert_trajs=self.expert_trajs)
Ejemplo n.º 19
0
 def test_configurable_blueprint(self):
     params = ParameterServer(
         filename="bark_ml/tests/data/highway_merge_configurable.json")
     # continuous model
     ml_behavior = BehaviorContinuousML(params=params)
     bp = ConfigurableScenarioBlueprint(params=params,
                                        ml_behavior=ml_behavior)
     env = SingleAgentRuntime(blueprint=bp, render=False)
     # agent
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # test run
     env.reset()
     for _ in range(0, 5):
         action = np.random.randint(low=0, high=3)
         observed_next_state, reward, done, info = env.step(action)
Ejemplo n.º 20
0
    def _configurable_setup(self, params_filename):
        """Configurable GNN setup depending on a given filename

    Args:
      params_filename: str, corresponds to path of params file

    Returns:
      params: ParameterServer instance
      observer: GraphObserver instance
      actor: ActorNetwork of BehaviorGraphSACAgent
    """
        params = ParameterServer(filename=params_filename)
        observer = GraphObserver(params=params)
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=2,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
        # Get GNN SAC actor net
        sac_agent = BehaviorGraphSACAgent(environment=env,
                                          observer=observer,
                                          params=params)
        actor = sac_agent._agent._actor_network
        return params, observer, actor
Ejemplo n.º 21
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )
Ejemplo n.º 22
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            b = ml_behavior.Plan(0.2, observed_world)
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            np.testing.assert_array_equal(a, b)
Ejemplo n.º 23
0
from bark_ml.observers.nearest_state_observer import NearestAgentsObserver
from bark_ml.environments.blueprints import DiscreteMergingBlueprint

# create scenario
if not os.path.exists("examples"):
    logging.info("changing directory")
    os.chdir("diadem_dqn.runfiles/bark_ml")

bark_params = ParameterServer(
    filename="examples/example_params/diadem_params.json")
bp = DiscreteMergingBlueprint(bark_params,
                              number_of_senarios=100,
                              random_seed=0)

observer = NearestAgentsObserver(bark_params)
runtime = SingleAgentRuntime(blueprint=bp, observer=observer, render=True)


def run_dqn_algorithm(parameter_files):
    exp_dir = "tmp_exp_dir"
    diadem_params = Params(filename=parameter_files)
    config_logging(console=True)
    environment = DiademBarkEnvironment(runtime=runtime)
    context = AgentContext(environment=environment,
                           datamanager=None,
                           preprocessor=None,
                           optimizer=tf.train.AdamOptimizer,
                           summary_service=PandasSummary())
    agent = AgentManager(params=diadem_params, context=context)

    exp = Experiment(params=diadem_params['experiment'],
Ejemplo n.º 24
0
 def __init__(self, *args, **kwargs):
     SingleAgentRuntime.__init__(self, *args, **kwargs)
Ejemplo n.º 25
0
 def __init__(self, params=ParameterServer(), render=False):
     discrete_merging_bp = DiscreteIntersectionBlueprint(params)
     SingleAgentRuntime.__init__(self,
                                 blueprint=discrete_merging_bp,
                                 render=render)
Ejemplo n.º 26
0
 def __init__(self):
     params = ParameterServer()
     cont_merging_bp = ContinuousIntersectionBlueprint(params)
     SingleAgentRuntime.__init__(self,
                                 blueprint=cont_merging_bp,
                                 render=True)
Ejemplo n.º 27
0
 def __init__(self, params=ParameterServer(), render=False):
     discrete_highway_bp = DiscreteHighwayBlueprint(params)
     SingleAgentRuntime.__init__(self,
                                 blueprint=discrete_highway_bp,
                                 render=render)
class PyGraphObserverTests(unittest.TestCase):
  """Observer tests"""

  def _get_observation(self, observer, world, eval_id):
    observed_world = world.Observe([eval_id])[0]
    observation = observer.Observe(observed_world)
    return observation, observed_world

  def setUp(self):
    """Setting up the test-case."""
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params, random_seed=0)
    self.env = SingleAgentRuntime(blueprint=bp, render=False)
    self.env.reset()
    self.world = self.env._world
    self.observer = GraphObserver(params)
    self.eval_id = self.env._scenario._eval_agent_ids[0]


  def test_parameter_server_usage(self):
    expected_num_agents = 15
    expected_visibility_radius = 100

    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = expected_num_agents
    params["ML"]["GraphObserver"]["VisibilityRadius"] = expected_visibility_radius
    params["ML"]["GraphObserver"]["NormalizationEnabled"] = True
    observer = GraphObserver(params=params)

    self.assertEqual(observer._num_agents, expected_num_agents)
    self.assertEqual(observer._visibility_radius, expected_visibility_radius)
    # self.assertTrue(observer._add_self_loops)
    self.assertTrue(observer._normalize_observations)

  def test_request_subset_of_available_node_features(self):
    params = ParameterServer()

    requested_features = GraphObserver.available_node_attributes()[0:5]
    params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    self.assertEqual(
      observer._enabled_node_attribute_keys,
      requested_features)

  def test_request_subset_of_available_edge_features(self):
    params = ParameterServer()

    requested_features = GraphObserver.available_edge_attributes()[0:2]
    params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    self.assertEqual(
      observer._enabled_edge_attribute_keys,
      requested_features)

  def test_request_partially_invalid_node_features(self):
    params = ParameterServer()

    requested_features =\
      GraphObserver.available_node_attributes()[0:5] + ['invalid']
    params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    # remove invalid feature from expected list
    requested_features.pop(-1)

    self.assertEqual(
      observer._enabled_node_attribute_keys,
      requested_features)

  def test_request_partially_invalid_edge_features(self):
    params = ParameterServer()

    requested_features =\
      GraphObserver.available_edge_attributes()[0:2] + ['invalid']
    params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    # remove invalid feature from expected list
    requested_features.pop(-1)

    self.assertEqual(
      observer._enabled_edge_attribute_keys,
      requested_features)

  def test_observe_with_self_loops(self):
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    params["ML"]["GraphObserver"]["SelfLoops"] = True
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0]))

    # assert ones on the diagonal of the adjacency matrix
    tf.assert_equal(adjacency_list_diagonal, tf.ones(num_agents))

  def test_observe_without_self_loops(self):
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    params["ML"]["GraphObserver"]["SelfLoops"] = False
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0]))

    # assert zeros on the diagonal of the adjacency matrix
    tf.assert_equal(adjacency_list_diagonal, tf.zeros(num_agents))

  def test_observation_conforms_to_spec(self):
    """
    Verify that the observation returned by the observer
    is valid with respect to its defined observation space.
    """
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)

    self.assertTrue(observer.observation_space.contains(obs))

    # additionally check that the adjacency list is binary, since
    # this can't be enforced by the observation space currently
    adj_start_idx = num_agents * observer.feature_len
    adj_end_idx = adj_start_idx + num_agents ** 2
    adj_list = obs[adj_start_idx : adj_end_idx]

    for element in adj_list: self.assertIn(element, [0, 1])

  def test_observed_agents_selection(self):
    agent_limit = 10
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = agent_limit
    observer = GraphObserver(params=params)

    obs, obs_world = self._get_observation(
      observer=observer,
      world=self.world,
      eval_id=self.eval_id)

    obs = tf.expand_dims(obs, 0) # add a batch dimension

    nodes, _, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    nodes = nodes[0] # remove batch dim

    ego_node = nodes[0]
    ego_node_pos = Point2d(
      ego_node[0].numpy(), # x coordinate
      ego_node[1].numpy()) # y coordinate

    # verify that the nodes are ordered by
    # ascending distance to the ego node
    max_distance_to_ego = 0
    for node in nodes:
      pos = Point2d(
        node[0].numpy(), # x coordinate
        node[1].numpy()) # y coordinate
      distance_to_ego = Distance(pos, ego_node_pos)

      self.assertGreaterEqual(distance_to_ego, max_distance_to_ego,
        msg='Nodes are not sorted by distance relative to '\
          + 'the ego node in ascending order.')

      max_distance_to_ego = distance_to_ego

  def test_observation_to_graph_conversion(self):
    params = ParameterServer()
    params["ML"]["GraphObserver"]["SelfLoops"] = False
    graph_observer = GraphObserver(params=params)

    num_nodes = 5
    num_features = 5
    num_edge_features = 4

    node_features = np.random.random_sample((num_nodes, num_features))
    edge_features = np.random.random_sample((num_nodes, num_nodes, num_edge_features))

    # note that edges are bidirectional, the
    # the matrix is symmetric
    adjacency_list = [
      [0, 1, 1, 1, 0], # 1 connects with 2, 3, 4
      [1, 0, 1, 1, 0], # 2 connects with 3, 4
      [1, 1, 0, 1, 0], # 3 connects with 4
      [1, 1, 1, 0, 0], # 4 has no links
      [0, 0, 0, 0, 0]  # empty slot -> all zeros
    ]

    observation = np.array(node_features)
    observation = np.append(observation, adjacency_list)
    observation = np.append(observation, edge_features)
    observation = observation.reshape(-1)
    observations = np.array([observation, observation])

    self.assertEqual(observations.shape, (2, 150))

    expected_nodes = tf.constant([node_features, node_features])
    expected_edge_features = tf.constant([edge_features, edge_features])

    graph_dims = (num_nodes, num_features, num_edge_features)
    nodes, edges, edge_features = graph_observer.graph(observations, graph_dims)

    self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes)))
    self.assertTrue(tf.reduce_all(tf.equal(edge_features, expected_edge_features)))

    observations = np.array([observation, observation, observation])

    # in dense mode, the nodes of all graphs are in a single list
    expected_nodes = tf.constant([node_features, node_features, node_features])
    expected_nodes = tf.reshape(expected_nodes, [-1, num_features])

    # the edges encoded in the adjacency list above
    expected_dense_edges = tf.constant([
      # graph 1
      [0, 1], [0, 2], [0, 3],
      [1, 0], [1, 2], [1, 3],
      [2, 0], [2, 1], [2, 3],
      [3, 0], [3, 1], [3, 2],
      # graph 2
      [5, 6], [5, 7], [5, 8],
      [6, 5], [6, 7], [6, 8],
      [7, 5], [7, 6], [7, 8],
      [8, 5], [8, 6], [8, 7],
      # graph 3
      [10, 11], [10, 12], [10, 13],
      [11, 10], [11, 12], [11, 13],
      [12, 10], [12, 11], [12, 13],
      [13, 10], [13, 11], [13, 12]
    ], dtype=tf.int32)

    expected_node_to_graph_map = tf.constant([
      0, 0, 0, 0, 0,
      1, 1, 1, 1, 1,
      2, 2, 2, 2, 2
    ])

    observations = tf.convert_to_tensor(observations)
    print(observations)
    nodes, edges, node_to_graph_map, E =\
      GraphObserver.graph(observations, graph_dims, dense=True)

    self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes)))
    self.assertTrue(tf.reduce_all(tf.equal(edges, expected_dense_edges)))
    # self.assertTrue(tf.reduce_all(
    #   tf.equal(node_to_graph_map, expected_node_to_graph_map)))

  def test_agent_pruning(self):
    """
    Verify that the observer correctly handles the case where
    there are less agents in the world than set as the limit.
    tl;dr: check that all entries of the node features,
    adjacency matrix, and edge features not corresponding to
    actually existing agents are zeros.
    """
    num_agents = 25
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    observer = GraphObserver(params=params)
    obs, world = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    nodes, adjacency_matrix, edge_features = GraphObserver.graph(
      observations=obs,
      graph_dims=observer.graph_dimensions)

    self.assertEqual(nodes.shape, [1, num_agents, observer.feature_len])

    expected_num_agents = len(world.agents)

    # nodes that do not represent agents, but are contained
    # to fill up the required observation space.
    expected_n_fill_up_nodes = num_agents - expected_num_agents
    fill_up_nodes = nodes[0, expected_num_agents:]

    self.assertEqual(
      fill_up_nodes.shape,
      [expected_n_fill_up_nodes, observer.feature_len])

    # verify that entries for non-existing agents are all zeros
    self.assertEqual(tf.reduce_sum(fill_up_nodes), 0)

    # the equivalent for edges: verify that for each zero entry
    # in the adjacency matrix, the corresponding edge feature
    # vector is a zero vector of correct length.
    zero_indices = tf.where(tf.equal(adjacency_matrix, 0))
    fill_up_edge_features = tf.gather_nd(edge_features, zero_indices)
    edge_feature_len = observer.graph_dimensions[2]
    zero_edge_feature_vectors = tf.zeros(
      [zero_indices.shape[0], edge_feature_len])

    self.assertTrue(tf.reduce_all(tf.equal(
      fill_up_edge_features,
      zero_edge_feature_vectors)))
Ejemplo n.º 29
0
 def __init__(self):
     params = ParameterServer()
     cont_highway_bp = ContinuousHighwayBlueprint(params)
     SingleAgentRuntime.__init__(self,
                                 blueprint=cont_highway_bp,
                                 render=True)
Ejemplo n.º 30
0
# bp = DiscreteHighwayBlueprint(params,
#                               number_of_senarios=10,
#                               random_seed=0)


# arguments that are additionally set in the runtime
# overwrite the ones of the blueprint
# e.g. we can change observer to the cpp observer
observer = NearestObserver(params)
# viewer = MPViewer(params=params,
#                   x_range=[-35, 35],
#                   y_range=[-35, 35],
#                   follow_agent_id=True)
# viewer = VideoRenderer(renderer=viewer,
#                        world_step_time=0.2,
#                        fig_path="/Users/hart/2020/bark-ml/video/")
env = SingleAgentRuntime(blueprint=bp,
                         observer=observer,
                         render=True)

# gym interface
env.reset()
done = False
while done is False:
  action = np.random.uniform(
    low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, ))
  observed_next_state, reward, done, info = env.step(action)
  print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}")

# viewer.export_video(
#   filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)