Ejemplo n.º 1
0
 def test_general_evaluator(self):
   params = ParameterServer()
   bp = ContinuousSingleLaneBlueprint(params)
   env = SingleAgentRuntime(blueprint=bp, render=True)
   evaluator = GeneralEvaluator(params)
   env._evaluator = evaluator
   env.reset()
   for _ in range(0, 4):
     state, terminal, reward, info = env.step(np.array([0., 0.]))
     print(terminal, reward)
Ejemplo n.º 2
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        #env = gym.make("highway-v1", params=params)
        ml_behaviors = []
        # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
        ml_behaviors.append(FQFAgent(env=env, params=params))
        # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            action = np.random.randint(low=0, high=env.action_space.n)
            observed_next_state, reward, done, info = env.step(action)
            print(
                f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
            )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]

            # do a random action and plan trajectory
            action = np.random.randint(low=1, high=env.action_space.n)
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)

            # sample another different random action
            another_action = action
            while another_action == action:
                another_action = np.random.randint(low=1,
                                                   high=env.action_space.n)

            # plan trajectory for the another action
            ml_behavior.ActionToBehavior(another_action)
            b = ml_behavior.Plan(0.2, observed_world)

            # the trajectory generated by two different actions shoould be different
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            ml_behavior._set_action_externally = False
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            last_action = ml_behavior.GetLastAction()
            self.assertTrue(isinstance(last_action, float))

            # same trajectory for same state
            np.testing.assert_array_equal(a, b)
Ejemplo n.º 3
0
  def test_envs_discrete_rl(self):
    params = ParameterServer()
    discrete_blueprints = []
    discrete_blueprints.append(DiscreteHighwayBlueprint(params))
    discrete_blueprints.append(DiscreteMergingBlueprint(params))

    for bp in discrete_blueprints:
      env = SingleAgentRuntime(blueprint=bp, render=False)
      env.reset()
      for _ in range(0, 10):
        action = np.random.randint(low=0, high=3)
        observed_next_state, reward, done, info = env.step(action)
Ejemplo n.º 4
0
  def test_envs_cont_rl(self):
    params = ParameterServer()
    cont_blueprints = []
    cont_blueprints.append(ContinuousHighwayBlueprint(params))
    cont_blueprints.append(ContinuousMergingBlueprint(params))

    for bp in cont_blueprints:
      env = SingleAgentRuntime(blueprint=bp, render=False)
      env.reset()
      for _ in range(0, 10):
        action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
        observed_next_state, reward, done, info = env.step(action)
  def step(self, action):
    """perform the cf evaluation"""
    # simulate counterfactual worlds
    local_tracer = Tracer()
    eval_id = self._scenario._eval_agent_ids[0]
    self.St()
    cf_worlds = self.GenerateCounterfactualWorlds()
    for v in self._cf_axs.values():
      v["count"] = 0
    for i, cf_world in enumerate(cf_worlds):
      cf_key = list(cf_world.keys())[0]
      self.SimulateWorld(
        cf_world[cf_key], local_tracer, N=self._cf_simulation_steps,
        replaced_agent=cf_key, num_virtual_world=i)
    self.Et()

    # NOTE: this world would actually have the predicted traj.
    gt_world = self.ReplaceBehaviorModel()
    self.SimulateWorld(
      gt_world, local_tracer, N=self._cf_simulation_steps,
      replaced_agent="None", num_virtual_world="None")
    # NOTE: outsource
    hist = gt_world.agents[eval_id].history
    traj = np.stack([x[0] for x in hist])
    # self._viewer.drawTrajectory(traj, color='blue')

    if self._visualize_heatmap:
      self.DrawHeatmap(
        local_tracer,
        filename=self._results_folder + "cf_%03d" % self._count + "_heatmap")

    # evaluate counterfactual worlds
    trace = self.TraceCounterfactualWorldStats(local_tracer)
    collision_rate = trace['collision']/len(self._behavior_model_pool)
    print(collision_rate)
    self._logger.info(
      f"The counterfactual worlds have a collision" + \
      f"-rate of {collision_rate:.3f}.")

    # choose a policy
    executed_learned_policy = 1
    if collision_rate > self._max_col_rate:
      executed_learned_policy = 0
      self._logger.info(
        f"Executing fallback model.")
      self._world.agents[eval_id].behavior_model = self._ego_rule_based
    trace["executed_learned_policy"] = executed_learned_policy
    self._tracer.Trace(trace)
    self._count += 1
    for fig in self._cf_axs.values():
      for sub_ax in fig["ax"]:
        sub_ax.clear()
    return SingleAgentRuntime.step(self, action)
Ejemplo n.º 6
0
 def test_configurable_blueprint(self):
     params = ParameterServer(
         filename="bark_ml/tests/data/highway_merge_configurable.json")
     # continuous model
     ml_behavior = BehaviorContinuousML(params=params)
     bp = ConfigurableScenarioBlueprint(params=params,
                                        ml_behavior=ml_behavior)
     env = SingleAgentRuntime(blueprint=bp, render=False)
     # agent
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # test run
     env.reset()
     for _ in range(0, 5):
         action = np.random.randint(low=0, high=3)
         observed_next_state, reward, done, info = env.step(action)
Ejemplo n.º 7
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            b = ml_behavior.Plan(0.2, observed_world)
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            np.testing.assert_array_equal(a, b)
Ejemplo n.º 8
0
    def test_tracer(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params)
        tracer = Tracer()
        env = SingleAgentRuntime(blueprint=bp, render=False)
        for i in range(0, 2):
            env.reset()
            for _ in range(0, 10):
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                data = (observed_next_state, reward, done,
                        info) = env.step(action)
                tracer.Trace(data, num_episode=i)

        # NOTE: test basic tracing
        self.assertEqual(len(tracer._states), 20)
        for i in range(0, 20):
            self.assertEqual("is_terminal" in tracer._states[i].keys(), True)
            self.assertEqual("reward" in tracer._states[i].keys(), True)
            self.assertEqual("collision" in tracer._states[i].keys(), True)
            self.assertEqual("drivable_area" in tracer._states[i].keys(), True)
            self.assertEqual("goal_reached" in tracer._states[i].keys(), True)
            self.assertEqual("step_count" in tracer._states[i].keys(), True)

        # NOTE: test pandas magic
        tracer.ConvertToDf()
        # average collisions
        print(
            tracer.Query(key="collision",
                         group_by="num_episode",
                         agg_type="MEAN").mean())
        # average reward
        print(
            tracer.Query(key="reward", group_by="num_episode",
                         agg_type="SUM").mean())

        # NOTE: test reset
        tracer.Reset()
        self.assertEqual(len(tracer._states), 0)
        self.assertEqual(tracer._df, None)
Ejemplo n.º 9
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )
Ejemplo n.º 10
0
# bp = DiscreteHighwayBlueprint(params,
#                               number_of_senarios=10,
#                               random_seed=0)


# arguments that are additionally set in the runtime
# overwrite the ones of the blueprint
# e.g. we can change observer to the cpp observer
observer = NearestObserver(params)
# viewer = MPViewer(params=params,
#                   x_range=[-35, 35],
#                   y_range=[-35, 35],
#                   follow_agent_id=True)
# viewer = VideoRenderer(renderer=viewer,
#                        world_step_time=0.2,
#                        fig_path="/Users/hart/2020/bark-ml/video/")
env = SingleAgentRuntime(blueprint=bp,
                         observer=observer,
                         render=True)

# gym interface
env.reset()
done = False
while done is False:
  action = np.random.uniform(
    low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, ))
  observed_next_state, reward, done, info = env.step(action)
  print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}")

# viewer.export_video(
#   filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)
Ejemplo n.º 11
0
class DataGenerator():
    """DataGenerator generates a dataset for a supervised learning setting.

  Args:
      num_scenarios: An integer specifing the number of scenarios to
                     generate data for.
      dump_dir: A path specifing the location where to save the data
                [default: None -> data will not be saved].
      render: A boolean value indicating whether the scenario runs
              should be rendered while generating data (rendering
              slows the data generation process down a lot).
      params: A `ParameterServer` instance that enables further configuration
              of the observer. Defaults to a new instance.
  """
    def __init__(self,
                 num_scenarios=3,
                 dump_dir=None,
                 render=False,
                 params=ParameterServer()):
        """Inits DataGenerator with the parameters (see class definition)."""
        self._dump_dir = dump_dir
        self._num_scenarios = num_scenarios
        self._params = params
        self._bp = ContinuousHighwayBlueprint(self._params,\
          number_of_senarios=self._num_scenarios, random_seed=0)
        self._observer = GraphObserver(params=self._params)
        self._env = SingleAgentRuntime(blueprint=self._bp,
                                       observer=self._observer,
                                       render=render)

    def run_scenarios(self):
        """Run all scenarios sequentially
    
    Generates a dataset by running a scenario at a time.
    
    Returns:
      data_all_scenarios: list sceanrio_data (with scenario_data being
                          a list of datapoints (see definition of datapoint
                          in _run_scenario))"""
        data_all_scenarios = list()
        for _ in range(0, self._num_scenarios):
            # Generate Scenario
            scenario, idx = self._bp._scenario_generation.get_next_scenario()

            # Log progress of scenario generation in 20% steps
            part = max(1, int(self._num_scenarios / 5))
            if idx % part == 0:
                msg = "Running data_generation on scenario "+ str(idx+1) + \
                  "/"+ str(self._num_scenarios)
                logging.info(msg)

            data_scenario = self._run_scenario(scenario)
            # Save the data for this run in a seperate file and append it to dataset
            self._save_data(data_scenario)
            data_all_scenarios.append(data_scenario)

        return data_all_scenarios

    def _run_scenario(self, scenario):
        """Runs a specific scenario for a predefined number of steps.

    Args:
      scenario: bark-scenario
        
    Returns:
      scenario_data: list containing all data_points of the scenario run
                     (one datapoint is a Dict with entries "observation"
                     and "label". The observation is a Tensor, the label is a
                     OrderedDict with entries "steering" and "acceleration" for
                     the ego node)"""

        scenario_data = list()
        # Set boundaries for random actions
        low = np.array([-0.5, -0.02])
        high = np.array([0.5, 0.02])

        self._env.reset(scenario=scenario)
        done = False

        while done is False:
            action = np.random.uniform(low=low, high=high, size=(2, ))
            observation, reward, done, info = self._env.step(action)

            # Calculate the labels for the supervised setting
            action_labels = self._calc_labels(observation)

            # Save datum in data_scenario
            datum = dict()
            datum["observation"] = observation
            datum["label"] = action_labels
            scenario_data.append(datum)

        return scenario_data

    def _calc_labels(self, observation):
        """Calculates the perfect action labels for one observation

    Args:
      observation: An observation coming from the GraphObserver
    
    Returns:
      action_labels: OrderedDicts containing the perfect action values
                     (steering, acceleration) for the ego agent
    
    Raises:
      KeyError: A needed node attribute seems not to be present in the
                observation
    """
        # Get relevant meta data from the observer
        attribute_keys = self._observer._enabled_node_attribute_keys
        norm_data = self._observer.normalization_data
        normalize = self._observer._normalize_observations
        feature_len = self._observer.feature_len

        # Extract features of ego node
        node_features = observation[:feature_len]

        # Extract relevant features for further calculation
        goal_theta = node_features[attribute_keys.index("goal_theta")].numpy()
        theta = node_features[attribute_keys.index("theta")].numpy()
        vel = node_features[attribute_keys.index("vel")].numpy()
        goal_vel = node_features[attribute_keys.index("goal_vel")].numpy()
        goal_d = node_features[attribute_keys.index("goal_d")].numpy()

        # Denormalize features if they were normalized at first place
        if normalize:
            goal_theta = self._denormalize_value(goal_theta,
                                                 norm_data["theta"])
            theta = self._denormalize_value(theta, norm_data["theta"])
            vel = self._denormalize_value(vel, norm_data["vel"])
            goal_vel = self._denormalize_value(goal_vel, norm_data["vel"])
            goal_d = self._denormalize_value(goal_d, norm_data["distance"])

        # Calculate "perfect" steering
        steering = goal_theta - theta

        # Calculate "perfect" acceleration
        d_vel = goal_vel - vel
        # Equation derived from:
        #   d_goal = 0.5*acc*t² + v_0*t
        #   d_vel = acc*t
        acc = (1. / goal_d) * d_vel * (d_vel / 2 + vel)

        # Normalize labels if the node features were normalized at the beginning
        if normalize:
            # Manually define ranges for steering and acc to norm data
            range_steering = [-0.1, 0.1]
            range_acc = [-0.6, 0.6]
            steering = self._normalize_value(steering, range_steering)
            acc = self._normalize_value(acc, range_acc)

        action_labels = OrderedDict()
        action_labels["steering"] = steering
        action_labels["acceleration"] = acc

        return action_labels

    def _normalize_value(self, value, range):
        """Normalization of value to range

    If the `value` is outside the given range, it's clamped 
    to the bound of [-1, 1]
    """
        normed = 2 * (value - range[0]) / (range[1] - range[0]) - 1
        normed = max(-1, normed)  # values lower -1 clipped
        normed = min(1, normed)  # values bigger 1 clipped
        return normed

    def _denormalize_value(self, normed, range):
        """Inverse function of a normalization

    Reconstructs the original value from a given normed value and a range.
    The reconstruction is with errors, if the value was clipped during
    normalization. The expected range of the original normalization is [-1,1].
    
    Args:
      normed: Double or float
      range: List containing lower and upper bound

    Returns:
      value: Double or float"""

        value = 0.5 * (normed + 1) * (range[1] - range[0]) + range[0]
        return value

    def _save_data(self, data):
        """Save the data in a prespecified directory

    Args:
      data: the data that needs to be saved
    """
        if self._dump_dir is not None:
            # Check if path is existent or generate directory
            if not os.path.exists(self._dump_dir):
                os.makedirs(self._dump_dir)
            path = self._dump_dir + '/dataset_' + str(int(
                time.time() * 10000)) + '.pickle'
            with open(path, 'wb') as handle:
                pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)