def test_general_evaluator(self): params = ParameterServer() bp = ContinuousSingleLaneBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) evaluator = GeneralEvaluator(params) env._evaluator = evaluator env.reset() for _ in range(0, 4): state, terminal, reward, info = env.step(np.array([0., 0.])) print(terminal, reward)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) #env = gym.make("highway-v1", params=params) ml_behaviors = [] # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params)) ml_behaviors.append(FQFAgent(env=env, params=params)) # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() action = np.random.randint(low=0, high=env.action_space.n) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] # do a random action and plan trajectory action = np.random.randint(low=1, high=env.action_space.n) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) # sample another different random action another_action = action while another_action == action: another_action = np.random.randint(low=1, high=env.action_space.n) # plan trajectory for the another action ml_behavior.ActionToBehavior(another_action) b = ml_behavior.Plan(0.2, observed_world) # the trajectory generated by two different actions shoould be different self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. ml_behavior._set_action_externally = False a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) last_action = ml_behavior.GetLastAction() self.assertTrue(isinstance(last_action, float)) # same trajectory for same state np.testing.assert_array_equal(a, b)
def test_envs_discrete_rl(self): params = ParameterServer() discrete_blueprints = [] discrete_blueprints.append(DiscreteHighwayBlueprint(params)) discrete_blueprints.append(DiscreteMergingBlueprint(params)) for bp in discrete_blueprints: env = SingleAgentRuntime(blueprint=bp, render=False) env.reset() for _ in range(0, 10): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_envs_cont_rl(self): params = ParameterServer() cont_blueprints = [] cont_blueprints.append(ContinuousHighwayBlueprint(params)) cont_blueprints.append(ContinuousMergingBlueprint(params)) for bp in cont_blueprints: env = SingleAgentRuntime(blueprint=bp, render=False) env.reset() for _ in range(0, 10): action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action)
def test_goal_reached_cpp_evaluator(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) env.reset() world = env._world eval_id = env._scenario._eval_agent_ids[0] observed_world = world.Observe([eval_id])[0] evaluator = GoalReachedEvaluator(params) action = np.array([0., 0.], dtype=np.float32) start_time = time.time() print(evaluator.Evaluate(observed_world, action)) end_time = time.time() print(f"It took {end_time-start_time} seconds.")
def test_sac_graph_agent(self): params = ParameterServer() bp = ContinuousMergingBlueprint(params, number_of_senarios=2500, random_seed=0) observer = GraphObserver(params=params) env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False) sac_agent = BehaviorGraphSACAgent(environment=env, observer=observer, params=params) env.ml_behavior = sac_agent env.reset() eval_id = env._scenario._eval_agent_ids[0] self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent) for _ in range(0, 5): env._world.Step(0.2)
def test_configurable_blueprint(self): params = ParameterServer( filename="bark_ml/tests/data/highway_merge_configurable.json") # continuous model ml_behavior = BehaviorContinuousML(params=params) bp = ConfigurableScenarioBlueprint(params=params, ml_behavior=ml_behavior) env = SingleAgentRuntime(blueprint=bp, render=False) # agent sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # test run env.reset() for _ in range(0, 5): action = np.random.randint(low=0, high=3) observed_next_state, reward, done, info = env.step(action)
def test_nearest_observer(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) env = SingleAgentRuntime(blueprint=bp, render=True) env.reset() world = env._world # under test observer = NearestAgentsObserver(params) eval_id = env._scenario._eval_agent_ids[0] observed_world = world.Observe([eval_id])[0] start_time = time.time() observed_state = observer.Observe(observed_world) end_time = time.time() print(f"It took {end_time-start_time} seconds.") print(observed_state, observer.observation_space.shape)
def test_tracing_bark_world(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) sac_agent = BehaviorSACAgent(environment=env, params=params) env.ml_behavior = sac_agent # NOTE: this also tests if a BARK agent is self-contained env.ml_behavior.set_actions_externally = False env.reset() bark_world = env._world for j in range(0, 2): for i in range(0, 5): bark_world.Step(0.2) eval_dict = bark_world.Evaluate() tracer.Trace(eval_dict, num_episode=j) self.assertEqual(len(tracer._states), 10)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) ml_behavior.ActionToBehavior(action) b = ml_behavior.Plan(0.2, observed_world) self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) np.testing.assert_array_equal(a, b)
def test_tracer(self): params = ParameterServer() bp = ContinuousHighwayBlueprint(params) tracer = Tracer() env = SingleAgentRuntime(blueprint=bp, render=False) for i in range(0, 2): env.reset() for _ in range(0, 10): action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) data = (observed_next_state, reward, done, info) = env.step(action) tracer.Trace(data, num_episode=i) # NOTE: test basic tracing self.assertEqual(len(tracer._states), 20) for i in range(0, 20): self.assertEqual("is_terminal" in tracer._states[i].keys(), True) self.assertEqual("reward" in tracer._states[i].keys(), True) self.assertEqual("collision" in tracer._states[i].keys(), True) self.assertEqual("drivable_area" in tracer._states[i].keys(), True) self.assertEqual("goal_reached" in tracer._states[i].keys(), True) self.assertEqual("step_count" in tracer._states[i].keys(), True) # NOTE: test pandas magic tracer.ConvertToDf() # average collisions print( tracer.Query(key="collision", group_by="num_episode", agg_type="MEAN").mean()) # average reward print( tracer.Query(key="reward", group_by="num_episode", agg_type="SUM").mean()) # NOTE: test reset tracer.Reset() self.assertEqual(len(tracer._states), 0) self.assertEqual(tracer._df, None)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = ContinuousHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=True) ml_behaviors = [] ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params)) ml_behaviors.append(BehaviorSACAgent(environment=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() done = False while done is False: action = np.random.uniform(low=-0.1, high=0.1, size=(2, )) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" )
# bp = DiscreteHighwayBlueprint(params, # number_of_senarios=10, # random_seed=0) # arguments that are additionally set in the runtime # overwrite the ones of the blueprint # e.g. we can change observer to the cpp observer observer = NearestObserver(params) # viewer = MPViewer(params=params, # x_range=[-35, 35], # y_range=[-35, 35], # follow_agent_id=True) # viewer = VideoRenderer(renderer=viewer, # world_step_time=0.2, # fig_path="/Users/hart/2020/bark-ml/video/") env = SingleAgentRuntime(blueprint=bp, observer=observer, render=True) # gym interface env.reset() done = False while done is False: action = np.random.uniform( low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, )) observed_next_state, reward, done, info = env.step(action) print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}") # viewer.export_video( # filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)
class PyGraphObserverTests(unittest.TestCase): """Observer tests""" def _get_observation(self, observer, world, eval_id): observed_world = world.Observe([eval_id])[0] observation = observer.Observe(observed_world) return observation, observed_world def setUp(self): """Setting up the test-case.""" params = ParameterServer() bp = ContinuousHighwayBlueprint(params, random_seed=0) self.env = SingleAgentRuntime(blueprint=bp, render=False) self.env.reset() self.world = self.env._world self.observer = GraphObserver(params) self.eval_id = self.env._scenario._eval_agent_ids[0] def test_parameter_server_usage(self): expected_num_agents = 15 expected_visibility_radius = 100 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = expected_num_agents params["ML"]["GraphObserver"]["VisibilityRadius"] = expected_visibility_radius params["ML"]["GraphObserver"]["NormalizationEnabled"] = True observer = GraphObserver(params=params) self.assertEqual(observer._num_agents, expected_num_agents) self.assertEqual(observer._visibility_radius, expected_visibility_radius) # self.assertTrue(observer._add_self_loops) self.assertTrue(observer._normalize_observations) def test_request_subset_of_available_node_features(self): params = ParameterServer() requested_features = GraphObserver.available_node_attributes()[0:5] params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features observer = GraphObserver(params=params) self.assertEqual( observer._enabled_node_attribute_keys, requested_features) def test_request_subset_of_available_edge_features(self): params = ParameterServer() requested_features = GraphObserver.available_edge_attributes()[0:2] params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features observer = GraphObserver(params=params) self.assertEqual( observer._enabled_edge_attribute_keys, requested_features) def test_request_partially_invalid_node_features(self): params = ParameterServer() requested_features =\ GraphObserver.available_node_attributes()[0:5] + ['invalid'] params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features observer = GraphObserver(params=params) # remove invalid feature from expected list requested_features.pop(-1) self.assertEqual( observer._enabled_node_attribute_keys, requested_features) def test_request_partially_invalid_edge_features(self): params = ParameterServer() requested_features =\ GraphObserver.available_edge_attributes()[0:2] + ['invalid'] params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features observer = GraphObserver(params=params) # remove invalid feature from expected list requested_features.pop(-1) self.assertEqual( observer._enabled_edge_attribute_keys, requested_features) def test_observe_with_self_loops(self): num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents params["ML"]["GraphObserver"]["SelfLoops"] = True observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0])) # assert ones on the diagonal of the adjacency matrix tf.assert_equal(adjacency_list_diagonal, tf.ones(num_agents)) def test_observe_without_self_loops(self): num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents params["ML"]["GraphObserver"]["SelfLoops"] = False observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0])) # assert zeros on the diagonal of the adjacency matrix tf.assert_equal(adjacency_list_diagonal, tf.zeros(num_agents)) def test_observation_conforms_to_spec(self): """ Verify that the observation returned by the observer is valid with respect to its defined observation space. """ num_agents = 4 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents observer = GraphObserver(params=params) obs, _ = self._get_observation(observer, self.world, self.eval_id) self.assertTrue(observer.observation_space.contains(obs)) # additionally check that the adjacency list is binary, since # this can't be enforced by the observation space currently adj_start_idx = num_agents * observer.feature_len adj_end_idx = adj_start_idx + num_agents ** 2 adj_list = obs[adj_start_idx : adj_end_idx] for element in adj_list: self.assertIn(element, [0, 1]) def test_observed_agents_selection(self): agent_limit = 10 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = agent_limit observer = GraphObserver(params=params) obs, obs_world = self._get_observation( observer=observer, world=self.world, eval_id=self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension nodes, _, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions) nodes = nodes[0] # remove batch dim ego_node = nodes[0] ego_node_pos = Point2d( ego_node[0].numpy(), # x coordinate ego_node[1].numpy()) # y coordinate # verify that the nodes are ordered by # ascending distance to the ego node max_distance_to_ego = 0 for node in nodes: pos = Point2d( node[0].numpy(), # x coordinate node[1].numpy()) # y coordinate distance_to_ego = Distance(pos, ego_node_pos) self.assertGreaterEqual(distance_to_ego, max_distance_to_ego, msg='Nodes are not sorted by distance relative to '\ + 'the ego node in ascending order.') max_distance_to_ego = distance_to_ego def test_observation_to_graph_conversion(self): params = ParameterServer() params["ML"]["GraphObserver"]["SelfLoops"] = False graph_observer = GraphObserver(params=params) num_nodes = 5 num_features = 5 num_edge_features = 4 node_features = np.random.random_sample((num_nodes, num_features)) edge_features = np.random.random_sample((num_nodes, num_nodes, num_edge_features)) # note that edges are bidirectional, the # the matrix is symmetric adjacency_list = [ [0, 1, 1, 1, 0], # 1 connects with 2, 3, 4 [1, 0, 1, 1, 0], # 2 connects with 3, 4 [1, 1, 0, 1, 0], # 3 connects with 4 [1, 1, 1, 0, 0], # 4 has no links [0, 0, 0, 0, 0] # empty slot -> all zeros ] observation = np.array(node_features) observation = np.append(observation, adjacency_list) observation = np.append(observation, edge_features) observation = observation.reshape(-1) observations = np.array([observation, observation]) self.assertEqual(observations.shape, (2, 150)) expected_nodes = tf.constant([node_features, node_features]) expected_edge_features = tf.constant([edge_features, edge_features]) graph_dims = (num_nodes, num_features, num_edge_features) nodes, edges, edge_features = graph_observer.graph(observations, graph_dims) self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes))) self.assertTrue(tf.reduce_all(tf.equal(edge_features, expected_edge_features))) observations = np.array([observation, observation, observation]) # in dense mode, the nodes of all graphs are in a single list expected_nodes = tf.constant([node_features, node_features, node_features]) expected_nodes = tf.reshape(expected_nodes, [-1, num_features]) # the edges encoded in the adjacency list above expected_dense_edges = tf.constant([ # graph 1 [0, 1], [0, 2], [0, 3], [1, 0], [1, 2], [1, 3], [2, 0], [2, 1], [2, 3], [3, 0], [3, 1], [3, 2], # graph 2 [5, 6], [5, 7], [5, 8], [6, 5], [6, 7], [6, 8], [7, 5], [7, 6], [7, 8], [8, 5], [8, 6], [8, 7], # graph 3 [10, 11], [10, 12], [10, 13], [11, 10], [11, 12], [11, 13], [12, 10], [12, 11], [12, 13], [13, 10], [13, 11], [13, 12] ], dtype=tf.int32) expected_node_to_graph_map = tf.constant([ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 ]) observations = tf.convert_to_tensor(observations) print(observations) nodes, edges, node_to_graph_map, E =\ GraphObserver.graph(observations, graph_dims, dense=True) self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes))) self.assertTrue(tf.reduce_all(tf.equal(edges, expected_dense_edges))) # self.assertTrue(tf.reduce_all( # tf.equal(node_to_graph_map, expected_node_to_graph_map))) def test_agent_pruning(self): """ Verify that the observer correctly handles the case where there are less agents in the world than set as the limit. tl;dr: check that all entries of the node features, adjacency matrix, and edge features not corresponding to actually existing agents are zeros. """ num_agents = 25 params = ParameterServer() params["ML"]["GraphObserver"]["AgentLimit"] = num_agents observer = GraphObserver(params=params) obs, world = self._get_observation(observer, self.world, self.eval_id) obs = tf.expand_dims(obs, 0) # add a batch dimension nodes, adjacency_matrix, edge_features = GraphObserver.graph( observations=obs, graph_dims=observer.graph_dimensions) self.assertEqual(nodes.shape, [1, num_agents, observer.feature_len]) expected_num_agents = len(world.agents) # nodes that do not represent agents, but are contained # to fill up the required observation space. expected_n_fill_up_nodes = num_agents - expected_num_agents fill_up_nodes = nodes[0, expected_num_agents:] self.assertEqual( fill_up_nodes.shape, [expected_n_fill_up_nodes, observer.feature_len]) # verify that entries for non-existing agents are all zeros self.assertEqual(tf.reduce_sum(fill_up_nodes), 0) # the equivalent for edges: verify that for each zero entry # in the adjacency matrix, the corresponding edge feature # vector is a zero vector of correct length. zero_indices = tf.where(tf.equal(adjacency_matrix, 0)) fill_up_edge_features = tf.gather_nd(edge_features, zero_indices) edge_feature_len = observer.graph_dimensions[2] zero_edge_feature_vectors = tf.zeros( [zero_indices.shape[0], edge_feature_len]) self.assertTrue(tf.reduce_all(tf.equal( fill_up_edge_features, zero_edge_feature_vectors)))
class DataGenerator(): """DataGenerator generates a dataset for a supervised learning setting. Args: num_scenarios: An integer specifing the number of scenarios to generate data for. dump_dir: A path specifing the location where to save the data [default: None -> data will not be saved]. render: A boolean value indicating whether the scenario runs should be rendered while generating data (rendering slows the data generation process down a lot). params: A `ParameterServer` instance that enables further configuration of the observer. Defaults to a new instance. """ def __init__(self, num_scenarios=3, dump_dir=None, render=False, params=ParameterServer()): """Inits DataGenerator with the parameters (see class definition).""" self._dump_dir = dump_dir self._num_scenarios = num_scenarios self._params = params self._bp = ContinuousHighwayBlueprint(self._params,\ number_of_senarios=self._num_scenarios, random_seed=0) self._observer = GraphObserver(params=self._params) self._env = SingleAgentRuntime(blueprint=self._bp, observer=self._observer, render=render) def run_scenarios(self): """Run all scenarios sequentially Generates a dataset by running a scenario at a time. Returns: data_all_scenarios: list sceanrio_data (with scenario_data being a list of datapoints (see definition of datapoint in _run_scenario))""" data_all_scenarios = list() for _ in range(0, self._num_scenarios): # Generate Scenario scenario, idx = self._bp._scenario_generation.get_next_scenario() # Log progress of scenario generation in 20% steps part = max(1, int(self._num_scenarios / 5)) if idx % part == 0: msg = "Running data_generation on scenario "+ str(idx+1) + \ "/"+ str(self._num_scenarios) logging.info(msg) data_scenario = self._run_scenario(scenario) # Save the data for this run in a seperate file and append it to dataset self._save_data(data_scenario) data_all_scenarios.append(data_scenario) return data_all_scenarios def _run_scenario(self, scenario): """Runs a specific scenario for a predefined number of steps. Args: scenario: bark-scenario Returns: scenario_data: list containing all data_points of the scenario run (one datapoint is a Dict with entries "observation" and "label". The observation is a Tensor, the label is a OrderedDict with entries "steering" and "acceleration" for the ego node)""" scenario_data = list() # Set boundaries for random actions low = np.array([-0.5, -0.02]) high = np.array([0.5, 0.02]) self._env.reset(scenario=scenario) done = False while done is False: action = np.random.uniform(low=low, high=high, size=(2, )) observation, reward, done, info = self._env.step(action) # Calculate the labels for the supervised setting action_labels = self._calc_labels(observation) # Save datum in data_scenario datum = dict() datum["observation"] = observation datum["label"] = action_labels scenario_data.append(datum) return scenario_data def _calc_labels(self, observation): """Calculates the perfect action labels for one observation Args: observation: An observation coming from the GraphObserver Returns: action_labels: OrderedDicts containing the perfect action values (steering, acceleration) for the ego agent Raises: KeyError: A needed node attribute seems not to be present in the observation """ # Get relevant meta data from the observer attribute_keys = self._observer._enabled_node_attribute_keys norm_data = self._observer.normalization_data normalize = self._observer._normalize_observations feature_len = self._observer.feature_len # Extract features of ego node node_features = observation[:feature_len] # Extract relevant features for further calculation goal_theta = node_features[attribute_keys.index("goal_theta")].numpy() theta = node_features[attribute_keys.index("theta")].numpy() vel = node_features[attribute_keys.index("vel")].numpy() goal_vel = node_features[attribute_keys.index("goal_vel")].numpy() goal_d = node_features[attribute_keys.index("goal_d")].numpy() # Denormalize features if they were normalized at first place if normalize: goal_theta = self._denormalize_value(goal_theta, norm_data["theta"]) theta = self._denormalize_value(theta, norm_data["theta"]) vel = self._denormalize_value(vel, norm_data["vel"]) goal_vel = self._denormalize_value(goal_vel, norm_data["vel"]) goal_d = self._denormalize_value(goal_d, norm_data["distance"]) # Calculate "perfect" steering steering = goal_theta - theta # Calculate "perfect" acceleration d_vel = goal_vel - vel # Equation derived from: # d_goal = 0.5*acc*t² + v_0*t # d_vel = acc*t acc = (1. / goal_d) * d_vel * (d_vel / 2 + vel) # Normalize labels if the node features were normalized at the beginning if normalize: # Manually define ranges for steering and acc to norm data range_steering = [-0.1, 0.1] range_acc = [-0.6, 0.6] steering = self._normalize_value(steering, range_steering) acc = self._normalize_value(acc, range_acc) action_labels = OrderedDict() action_labels["steering"] = steering action_labels["acceleration"] = acc return action_labels def _normalize_value(self, value, range): """Normalization of value to range If the `value` is outside the given range, it's clamped to the bound of [-1, 1] """ normed = 2 * (value - range[0]) / (range[1] - range[0]) - 1 normed = max(-1, normed) # values lower -1 clipped normed = min(1, normed) # values bigger 1 clipped return normed def _denormalize_value(self, normed, range): """Inverse function of a normalization Reconstructs the original value from a given normed value and a range. The reconstruction is with errors, if the value was clipped during normalization. The expected range of the original normalization is [-1,1]. Args: normed: Double or float range: List containing lower and upper bound Returns: value: Double or float""" value = 0.5 * (normed + 1) * (range[1] - range[0]) + range[0] return value def _save_data(self, data): """Save the data in a prespecified directory Args: data: the data that needs to be saved """ if self._dump_dir is not None: # Check if path is existent or generate directory if not os.path.exists(self._dump_dir): os.makedirs(self._dump_dir) path = self._dump_dir + '/dataset_' + str(int( time.time() * 10000)) + '.pickle' with open(path, 'wb') as handle: pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
def reset(self, scenario=None): """Resets the runtime and its objects.""" self._count = 0 return SingleAgentRuntime.reset(self, scenario=scenario)