def test_general_evaluator(self): params = ParameterServer() bp = ContinuousSingleLaneBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) evaluator = GeneralEvaluator(params) env._evaluator = evaluator env.reset() for _ in range(0, 4): state, terminal, reward, info = env.step(np.array([0., 0.])) print(terminal, reward)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) #env = gym.make("highway-v1", params=params) ml_behaviors = [] # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params)) ml_behaviors.append(FQFAgent(env=env, params=params)) # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() action = np.random.randint(low=0, high=env.action_space.n) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] # do a random action and plan trajectory action = np.random.randint(low=1, high=env.action_space.n) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) # sample another different random action another_action = action while another_action == action: another_action = np.random.randint(low=1, high=env.action_space.n) # plan trajectory for the another action ml_behavior.ActionToBehavior(another_action) b = ml_behavior.Plan(0.2, observed_world) # the trajectory generated by two different actions shoould be different self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. ml_behavior._set_action_externally = False a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) last_action = ml_behavior.GetLastAction() self.assertTrue(isinstance(last_action, float)) # same trajectory for same state np.testing.assert_array_equal(a, b)
def test_envs_discrete_rl(self): params = ParameterServer() discrete_blueprints = [] discrete_blueprints.append(DiscreteHighwayBlueprint(params)) discrete_blueprints.append(DiscreteMergingBlueprint(params)) for bp in discrete_blueprints: env = SingleAgentRuntime(blueprint=bp, render=False) env.reset() for _ in range(0, 10): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_envs_cont_rl(self): params = ParameterServer() cont_blueprints = [] cont_blueprints.append(ContinuousHighwayBlueprint(params)) cont_blueprints.append(ContinuousMergingBlueprint(params)) for bp in cont_blueprints: env = SingleAgentRuntime(blueprint=bp, render=False) env.reset() for _ in range(0, 10): action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action)
def step(self, action): """perform the cf evaluation""" # simulate counterfactual worlds local_tracer = Tracer() eval_id = self._scenario._eval_agent_ids[0] self.St() cf_worlds = self.GenerateCounterfactualWorlds() for v in self._cf_axs.values(): v["count"] = 0 for i, cf_world in enumerate(cf_worlds): cf_key = list(cf_world.keys())[0] self.SimulateWorld( cf_world[cf_key], local_tracer, N=self._cf_simulation_steps, replaced_agent=cf_key, num_virtual_world=i) self.Et() # NOTE: this world would actually have the predicted traj. gt_world = self.ReplaceBehaviorModel() self.SimulateWorld( gt_world, local_tracer, N=self._cf_simulation_steps, replaced_agent="None", num_virtual_world="None") # NOTE: outsource hist = gt_world.agents[eval_id].history traj = np.stack([x[0] for x in hist]) # self._viewer.drawTrajectory(traj, color='blue') if self._visualize_heatmap: self.DrawHeatmap( local_tracer, filename=self._results_folder + "cf_%03d" % self._count + "_heatmap") # evaluate counterfactual worlds trace = self.TraceCounterfactualWorldStats(local_tracer) collision_rate = trace['collision']/len(self._behavior_model_pool) print(collision_rate) self._logger.info( f"The counterfactual worlds have a collision" + \ f"-rate of {collision_rate:.3f}.") # choose a policy executed_learned_policy = 1 if collision_rate > self._max_col_rate: executed_learned_policy = 0 self._logger.info( f"Executing fallback model.") self._world.agents[eval_id].behavior_model = self._ego_rule_based trace["executed_learned_policy"] = executed_learned_policy self._tracer.Trace(trace) self._count += 1 for fig in self._cf_axs.values(): for sub_ax in fig["ax"]: sub_ax.clear() return SingleAgentRuntime.step(self, action)
def test_configurable_blueprint(self): params = ParameterServer( filename="bark_ml/tests/data/highway_merge_configurable.json") # continuous model ml_behavior = BehaviorContinuousML(params=params) bp = ConfigurableScenarioBlueprint(params=params, ml_behavior=ml_behavior) env = SingleAgentRuntime(blueprint=bp, render=False) # agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # test run env.reset() for _ in range(0, 5): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) b = ml_behavior.Plan(0.2, observed_world) self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) np.testing.assert_array_equal(a, b)
def test_tracer(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) for i in range(0, 2): env.reset() for _ in range(0, 10): action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) data = (observed_next_state, reward, done, info) = env.step(action) tracer.Trace(data, num_episode=i) # NOTE: test basic tracing self.assertEqual(len(tracer._states), 20) for i in range(0, 20): self.assertEqual("is_terminal" in tracer._states[i].keys(), True) self.assertEqual("reward" in tracer._states[i].keys(), True) self.assertEqual("collision" in tracer._states[i].keys(), True) self.assertEqual("drivable_area" in tracer._states[i].keys(), True) self.assertEqual("goal_reached" in tracer._states[i].keys(), True) self.assertEqual("step_count" in tracer._states[i].keys(), True) # NOTE: test pandas magic tracer.ConvertToDf() # average collisions print( tracer.Query(key="collision", group_by="num_episode", agg_type="MEAN").mean()) # average reward print( tracer.Query(key="reward", group_by="num_episode", agg_type="SUM").mean()) # NOTE: test reset tracer.Reset() self.assertEqual(len(tracer._states), 0) self.assertEqual(tracer._df, None)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=True) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" )
# bp = DiscreteHighwayBlueprint(params, # number_of_senarios=10, # random_seed=0) # arguments that are additionally set in the runtime # overwrite the ones of the blueprint # e.g. we can change observer to the cpp observer observer = NearestObserver(params) # viewer = MPViewer(params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) # viewer = VideoRenderer(renderer=viewer, # world_step_time=0.2, # fig_path="/Users/hart/2020/bark-ml/video/") env = SingleAgentRuntime(blueprint=bp, observer=observer, render=True) # gym interface env.reset() done = False while done is False: action = np.random.uniform( low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, )) observed_next_state, reward, done, info = env.step(action) print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}") # viewer.export_video( # filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)
class DataGenerator(): """DataGenerator generates a dataset for a supervised learning setting. Args: num_scenarios: An integer specifing the number of scenarios to generate data for. dump_dir: A path specifing the location where to save the data [default: None -> data will not be saved]. render: A boolean value indicating whether the scenario runs should be rendered while generating data (rendering slows the data generation process down a lot). params: A `ParameterServer` instance that enables further configuration of the observer. Defaults to a new instance. """ def __init__(self, num_scenarios=3, dump_dir=None, render=False, params=ParameterServer()): """Inits DataGenerator with the parameters (see class definition).""" self._dump_dir = dump_dir self._num_scenarios = num_scenarios self._params = params self._bp = ContinuousHighwayBlueprint(self._params,\ number_of_senarios=self._num_scenarios, random_seed=0) self._observer = GraphObserver(params=self._params) self._env = SingleAgentRuntime(blueprint=self._bp, observer=self._observer, render=render) def run_scenarios(self): """Run all scenarios sequentially Generates a dataset by running a scenario at a time. Returns: data_all_scenarios: list sceanrio_data (with scenario_data being a list of datapoints (see definition of datapoint in _run_scenario))""" data_all_scenarios = list() for _ in range(0, self._num_scenarios): # Generate Scenario scenario, idx = self._bp._scenario_generation.get_next_scenario() # Log progress of scenario generation in 20% steps part = max(1, int(self._num_scenarios / 5)) if idx % part == 0: msg = "Running data_generation on scenario "+ str(idx+1) + \ "/"+ str(self._num_scenarios) logging.info(msg) data_scenario = self._run_scenario(scenario) # Save the data for this run in a seperate file and append it to dataset self._save_data(data_scenario) data_all_scenarios.append(data_scenario) return data_all_scenarios def _run_scenario(self, scenario): """Runs a specific scenario for a predefined number of steps. Args: scenario: bark-scenario Returns: scenario_data: list containing all data_points of the scenario run (one datapoint is a Dict with entries "observation" and "label". The observation is a Tensor, the label is a OrderedDict with entries "steering" and "acceleration" for the ego node)""" scenario_data = list() # Set boundaries for random actions low = np.array([-0.5, -0.02]) high = np.array([0.5, 0.02]) self._env.reset(scenario=scenario) done = False while done is False: action = np.random.uniform(low=low, high=high, size=(2, )) observation, reward, done, info = self._env.step(action) # Calculate the labels for the supervised setting action_labels = self._calc_labels(observation) # Save datum in data_scenario datum = dict() datum["observation"] = observation datum["label"] = action_labels scenario_data.append(datum) return scenario_data def _calc_labels(self, observation): """Calculates the perfect action labels for one observation Args: observation: An observation coming from the GraphObserver Returns: action_labels: OrderedDicts containing the perfect action values (steering, acceleration) for the ego agent Raises: KeyError: A needed node attribute seems not to be present in the observation """ # Get relevant meta data from the observer attribute_keys = self._observer._enabled_node_attribute_keys norm_data = self._observer.normalization_data normalize = self._observer._normalize_observations feature_len = self._observer.feature_len # Extract features of ego node node_features = observation[:feature_len] # Extract relevant features for further calculation goal_theta = node_features[attribute_keys.index("goal_theta")].numpy() theta = node_features[attribute_keys.index("theta")].numpy() vel = node_features[attribute_keys.index("vel")].numpy() goal_vel = node_features[attribute_keys.index("goal_vel")].numpy() goal_d = node_features[attribute_keys.index("goal_d")].numpy() # Denormalize features if they were normalized at first place if normalize: goal_theta = self._denormalize_value(goal_theta, norm_data["theta"]) theta = self._denormalize_value(theta, norm_data["theta"]) vel = self._denormalize_value(vel, norm_data["vel"]) goal_vel = self._denormalize_value(goal_vel, norm_data["vel"]) goal_d = self._denormalize_value(goal_d, norm_data["distance"]) # Calculate "perfect" steering steering = goal_theta - theta # Calculate "perfect" acceleration d_vel = goal_vel - vel # Equation derived from: # d_goal = 0.5*acc*t² + v_0*t # d_vel = acc*t acc = (1. / goal_d) * d_vel * (d_vel / 2 + vel) # Normalize labels if the node features were normalized at the beginning if normalize: # Manually define ranges for steering and acc to norm data range_steering = [-0.1, 0.1] range_acc = [-0.6, 0.6] steering = self._normalize_value(steering, range_steering) acc = self._normalize_value(acc, range_acc) action_labels = OrderedDict() action_labels["steering"] = steering action_labels["acceleration"] = acc return action_labels def _normalize_value(self, value, range): """Normalization of value to range If the `value` is outside the given range, it's clamped to the bound of [-1, 1] """ normed = 2 * (value - range[0]) / (range[1] - range[0]) - 1 normed = max(-1, normed) # values lower -1 clipped normed = min(1, normed) # values bigger 1 clipped return normed def _denormalize_value(self, normed, range): """Inverse function of a normalization Reconstructs the original value from a given normed value and a range. The reconstruction is with errors, if the value was clipped during normalization. The expected range of the original normalization is [-1,1]. Args: normed: Double or float range: List containing lower and upper bound Returns: value: Double or float""" value = 0.5 * (normed + 1) * (range[1] - range[0]) + range[0] return value def _save_data(self, data): """Save the data in a prespecified directory Args: data: the data that needs to be saved """ if self._dump_dir is not None: # Check if path is existent or generate directory if not os.path.exists(self._dump_dir): os.makedirs(self._dump_dir) path = self._dump_dir + '/dataset_' + str(int( time.time() * 10000)) + '.pickle' with open(path, 'wb') as handle: pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)