def observe(self) -> np.ndarray: if not self.env.road: return np.zeros(self.space().shape) if self.absolute: raise NotImplementedError() else: # Add nearby traffic self.grid.fill(0) df = pd.DataFrame.from_records( [v.to_dict(self.observer_vehicle) for v in self.env.road.vehicles]) # Normalize df = self.normalize(df) # Fill-in features for layer, feature in enumerate(self.features): for _, vehicle in df.iterrows(): x, y = vehicle["x"], vehicle["y"] # Recover unnormalized coordinates for cell index if "x" in self.features_range: x = utils.lmap(x, [-1, 1], [self.features_range["x"][0], self.features_range["x"][1]]) if "y" in self.features_range: y = utils.lmap(y, [-1, 1], [self.features_range["y"][0], self.features_range["y"][1]]) cell = (int((x - self.grid_size[0, 0]) / self.grid_step[0]), int((y - self.grid_size[1, 0]) / self.grid_step[1])) if 0 <= cell[1] < self.grid.shape[-2] and 0 <= cell[0] < self.grid.shape[-1]: self.grid[layer, cell[1], cell[0]] = vehicle[feature] # Clip obs = np.clip(self.grid, -1, 1) return obs
def _reward(self, action: Action) -> float: """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index) lane = self.vehicle.target_lane_index[2] if isinstance(self.vehicle, ControlledVehicle) \ else self.vehicle.lane_index[2] scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) lane_change = action == 0 or action == 2 reward = \ + self.config["collision_reward"] * self.vehicle.crashed \ + self.config["right_lane_reward"] * lane / max(len(neighbours) - 1, 1) \ + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \ + self.config["lane_change_reward"] * lane_change reward = utils.lmap( reward, [ self.config[ "collision_reward"], # + self.config["lane_change_reward"] self.config["high_speed_reward"] + self.config["right_lane_reward"] ], [0, 1]) reward = 0 if not self.vehicle.on_road else reward return reward
def _agent_reward_type_1(self, vehicle, action): """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index) lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \ else vehicle.lane_index[2] scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) # TODO: this must be read from action.py, to determine if it is a lane change or not, # for now it's hard-coded to 0 and 2 lane_change = (action == 0 or action == 2) reward = \ + self.collision_reward * vehicle.crashed \ + self.on_desired_lane_reward * lane / max(len(neighbours) - 1, 1) \ + self.high_speed_reward * np.clip(scaled_speed, 0, 1) \ # + self.lane_change_reward * lane_change if self.normalize_reward: reward = utils.lmap(reward, [ self.collision_reward + self.lane_change_reward, self.high_speed_reward + self.on_desired_lane_reward ], [0, 1]) reward = 0 if not vehicle.on_road else reward return reward
def act(self, action: np.ndarray) -> None: # if self.clip: # action = np.clip(action, -1, 1) if self.longitudinal and self.lateral: self.controlled_vehicle.act({ "acceleration": action[0], # utils.lmap(action[0], [-1, 1], self.acceleration_range) "steering": action[1], # utils.lmap(action[1], [-1, 1], self.steering_range), }) elif self.longitudinal: self.controlled_vehicle.act({ "acceleration": utils.lmap(action[0], [-1, 1], self.acceleration_range), "steering": 0, }) elif self.lateral: self.controlled_vehicle.act({ "acceleration": 0, "steering": utils.lmap(action[0], [-1, 1], self.steering_range) }) self.last_action = action
def _agent_reward(self, action: int, vehicle: Vehicle) -> float: scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) reward = self.config["collision_reward"] * vehicle.crashed \ + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) reward = self.config["arrived_reward"] if self.has_arrived(vehicle) else reward if self.config["normalize_reward"]: reward = utils.lmap(reward, [self.config["collision_reward"], self.config["arrived_reward"]], [0, 1]) return reward
def _agent_reward_highway_coop(self, vehicle, action): """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index) lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \ else vehicle.lane_index[2] reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX] scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1]) # TODO: this must be read from action.py, to determine if it is a lane change or not, # for now it's hard-coded to 0 and 2 lane_change = (action == 0 or action == 2) collision_reward_value = self.collision_reward * vehicle.crashed on_desired_lane_reward_value = self.on_desired_lane_reward * lane / max( len(neighbours) - 1, 1) high_speed_reward_value = self.high_speed_reward * np.clip( scaled_speed, 0, 1) lane_change_reward_value = self.lane_change_reward * lane_change # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \ # np.clip(self.avg_speeds, 0, 1) cooperative_reward_value_avg_speeds = 0 reward = \ collision_reward_value \ + on_desired_lane_reward_value \ + high_speed_reward_value \ + lane_change_reward_value \ + cooperative_reward_value_avg_speeds if self.normalize_reward: reward = utils.lmap(reward, [ self.collision_reward + self.lane_change_reward, self.high_speed_reward + self.on_desired_lane_reward ], [0, 1]) reward = 0 if not vehicle.on_road else reward reward_info = { "reward_crashed": collision_reward_value, "reward_on_lane": on_desired_lane_reward_value, "reward_scaled_speed": high_speed_reward_value, "reward_lane_change": lane_change_reward_value, "reward_distance": 0, "reward_avg_speeds": cooperative_reward_value_avg_speeds, "reward_has_merged": 0, "reward_distance_merge_vehicle": 0, # "normalized_timestep_reward": reward # "vehilce_id": vehicle.id } return reward, reward_info
def observe(self) -> np.ndarray: if not self.env.road: return np.zeros(self.space().shape) if self.absolute: raise NotImplementedError() else: # Initialize empty data self.grid.fill(np.nan) # Get nearby traffic data df = pd.DataFrame.from_records([ v.to_dict(self.observer_vehicle) for v in self.env.road.vehicles ]) # Normalize df = self.normalize(df) # Fill-in features for layer, feature in enumerate(self.features): if feature in df.columns: # A vehicle feature for _, vehicle in df.iterrows(): x, y = vehicle["x"], vehicle["y"] # Recover unnormalized coordinates for cell index if "x" in self.features_range: x = utils.lmap(x, [-1, 1], [ self.features_range["x"][0], self.features_range["x"][1] ]) if "y" in self.features_range: y = utils.lmap(y, [-1, 1], [ self.features_range["y"][0], self.features_range["y"][1] ]) cell = self.pos_to_index((x, y), relative=not self.absolute) if 0 <= cell[1] < self.grid.shape[-2] and 0 <= cell[ 0] < self.grid.shape[-1]: self.grid[layer, cell[1], cell[0]] = vehicle[feature] elif feature == "on_road": self.fill_road_layer_by_lanes(layer) obs = self.grid if self.clip: obs = np.clip(obs, -1, 1) if self.as_image: obs = ((np.clip(obs, -1, 1) + 1) / 2 * 255).astype(np.uint8) obs = np.nan_to_num(obs).astype(self.space().dtype) return obs
def v_lmap(s, road_r, max_v): s = s.reshape(4, -1) x = s[0, :] y = s[1, :] theta = s[2, :] v = s[3, :] for i in range(50): x[i] = lmap(x[i], [-1, 1], [-(road_r + 4), (road_r + 4)]) y[i] = lmap(y[i], [-1, 1], [0, 2 * (road_r + 4)]) theta[i] = lmap(theta[i], [-1, 1], [-2 * np.pi, 2 * np.pi]) v[i] = lmap(v[i], [-1, 1], [-max_v, max_v]) return x, y, theta, v
def _agent_reward_type_2(self, vehicle, action): """ The reward is defined to foster driving at high speed, on a given target lane, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \ else vehicle.lane_index[2] scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) # TODO: this must be read from action.py, to determine if it is a lane change or not, # for now it's hard-coded to 0 and 2 lane_change = (action == 0 or action == 2) on_lane = (lane == self.target_lane) distances = [] distance = 0 for v in self.env.controlled_vehicles: if v is not vehicle: longitudinal_distance = vehicle.front_distance_to(v) # distance = np.linalg.norm(v.position - vehicle.position) distances.append(longitudinal_distance) if self.distance_reward_type == "min": distance_to_neighbor_car = min(np.abs(distances)) distance = utils.lmap( distance_to_neighbor_car, [Vehicle.LENGTH, self.env.PERCEPTION_DISTANCE], [0, 1]) elif self.distance_reward_type == "avg": average_distance = np.average(np.abs(distances)) distance = utils.lmap( average_distance, [Vehicle.LENGTH, self.env.PERCEPTION_DISTANCE], [0, 1]) reward = \ + self.collision_reward * vehicle.crashed \ + self.on_desired_lane_reward * on_lane \ + self.high_speed_reward * np.clip(scaled_speed, 0, 1) \ + self.lane_change_reward * lane_change \ + self.distance_reward * distance if self.normalize_reward: reward = utils.lmap(reward, [ self.collision_reward + self.lane_change_reward, self.high_speed_reward + self.on_desired_lane_reward ], [0, 1]) reward = 0 if not vehicle.on_road else reward return reward
def normalize_obs(self, df: pd.DataFrame) -> pd.DataFrame: """ Normalize the observation values. For now, assume that the road is straight along the x axis. :param Dataframe df: observation data """ if not self.features_range: side_lanes = self.env.road.network.all_side_lanes( self.observer_vehicle.lane_index) self.features_range = { "x": [-5.0 * MDPVehicle.SPEED_MAX, 5.0 * MDPVehicle.SPEED_MAX], "y": [ -AbstractLane.DEFAULT_WIDTH * len(side_lanes), AbstractLane.DEFAULT_WIDTH * len(side_lanes) ], "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX], "vy": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX] } for feature, f_range in self.features_range.items(): if feature in df: df[feature] = utils.lmap(df[feature], [f_range[0], f_range[1]], [-1, 1]) if self.clip: df[feature] = np.clip(df[feature], -1, 1) return df
def _reward(self, action: int) -> float: """ The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed. :param action: the action performed :return: the reward of the state-action transition """ action_reward = {0: self.LANE_CHANGE_REWARD, 1: 0, 2: self.LANE_CHANGE_REWARD, 3: 0, 4: 0} reward = self.COLLISION_REWARD * self.vehicle.crashed \ + self.RIGHT_LANE_REWARD * self.vehicle.lane_index[2] / 1 \ + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / (self.vehicle.SPEED_COUNT - 1) # Altruistic penalty for vehicle in self.road.vehicles: if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle): reward += self.MERGING_SPEED_REWARD * \ (vehicle.target_speed - vehicle.speed) / vehicle.target_speed return utils.lmap(action_reward[action] + reward, [self.COLLISION_REWARD + self.MERGING_SPEED_REWARD, self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD], [0, 1])
def _reward(self, action: int) -> float: """ The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed. :param action: the action performed :return: the reward of the state-action transition """ action_reward = { 0: self.config["lane_change_reward"], 1: 0, 2: self.config["lane_change_reward"], 3: 0, 4: 0 } reward = self.config["collision_reward"] * self.vehicle.crashed \ + self.config["right_lane_reward"] * self.vehicle.lane_index[2] / 1 \ + self.config["high_speed_reward"] * self.vehicle.speed_index / (self.vehicle.target_speeds.size - 1) # Altruistic penalty for vehicle in self.road.vehicles: if vehicle.lane_index == ("b", "c", 2) and isinstance( vehicle, ControlledVehicle): reward += self.config["merging_speed_reward"] * \ (vehicle.target_speed - vehicle.speed) / vehicle.target_speed return utils.lmap(action_reward[action] + reward, [ self.config["collision_reward"] + self.config["merging_speed_reward"], self.config["high_speed_reward"] + self.config["right_lane_reward"] ], [0, 1])
def _agent_reward_intersection(self, vehicle, action): """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index) reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX] scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1]) collision_reward_value = self.collision_reward * vehicle.crashed high_speed_reward_value = self.high_speed_reward * np.clip( scaled_speed, 0, 1) arrived_reward_value = self.arrived_reward if self.env.has_arrived_intersection( vehicle) else 0 # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \ # np.clip(self.avg_speeds, 0, 1) cooperative_reward_value_avg_speeds = 0 reward = \ collision_reward_value \ + high_speed_reward_value \ + cooperative_reward_value_avg_speeds\ + arrived_reward_value if self.normalize_reward: reward = utils.lmap(reward, [ self.collision_reward, self.high_speed_reward + self.arrived_reward ], [0, 1]) reward = 0 if not vehicle.on_road else reward reward_info = { "reward_crashed": collision_reward_value, "reward_on_lane": 0, "reward_scaled_speed": high_speed_reward_value, "reward_lane_change": 0, "reward_distance": 0, "reward_avg_speeds": cooperative_reward_value_avg_speeds, "reward_has_merged": 0, "reward_distance_merge_vehicle": 0, # "normalized_timestep_reward": reward # "vehilce_id": vehicle.id } return reward, reward_info
def _reward(self, action: int) -> float: reward = self.COLLISION_REWARD * self.vehicle.crashed \ + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / max(self.vehicle.SPEED_COUNT - 1, 1) \ + self.LANE_CHANGE_REWARD * (action in [0, 2]) return utils.lmap(reward, [ self.COLLISION_REWARD + self.LANE_CHANGE_REWARD, self.HIGH_SPEED_REWARD ], [0, 1])
def _agent_reward_simple(self, vehicle, action): """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX] scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1]) collision_reward = -2 high_speed_reward = 1 destination_reward = 4 collision_reward_value = collision_reward * vehicle.crashed high_speed_reward_value = high_speed_reward * np.clip( scaled_speed, 0, 1) final_steps = self.env.steps >= self.env.config["duration"] - 1 arrive_destination = True if final_steps else False destination_reward_value = destination_reward * arrive_destination reward = \ collision_reward_value \ + high_speed_reward_value \ + destination_reward_value if self.normalize_reward: reward = utils.lmap( reward, [collision_reward, high_speed_reward + destination_reward], [0, 1]) # reward = 0 if not vehicle.on_road else reward reward_info = { "reward_crashed": collision_reward_value, "reward_on_lane": 0, "reward_scaled_speed": high_speed_reward_value, "reward_lane_change": 0, "reward_distance": 0, "reward_avg_speeds": 0, "reward_has_merged": 0, "reward_distance_merge_vehicle": 0, # "normalized_timestep_reward": reward # "vehilce_id": vehicle.id } return reward, reward_info
def _reward(self, action: int) -> float: lane_change = action == 0 or action == 2 reward = self.COLLISION_REWARD * self.vehicle.crashed \ + self.HIGH_SPEED_REWARD * MDPVehicle.get_speed_index(self.vehicle) / max(MDPVehicle.SPEED_COUNT - 1, 1) \ + self.LANE_CHANGE_REWARD * lane_change return utils.lmap(reward, [ self.COLLISION_REWARD + self.LANE_CHANGE_REWARD, self.HIGH_SPEED_REWARD ], [0, 1])
def _reward(self, action: int) -> float: lane_change = action == 0 or action == 2 reward = self.config["collision_reward"] * self.vehicle.crashed \ + self.config["high_speed_reward"] * \ MDPVehicle.get_speed_index(self.vehicle) / max(MDPVehicle.SPEED_COUNT - 1, 1) \ + self.config["lane_change_reward"] * lane_change return utils.lmap(reward, [self.config["collision_reward"] + self.config["lane_change_reward"], self.config["high_speed_reward"]], [0, 1])
def _agent_reward(self, action: int, vehicle: Vehicle) -> float: reward = self.config["collision_reward"] * vehicle.crashed #\ #+ self.HIGH_SPEED_REWARD * (vehicle.speed_index == vehicle.SPEED_COUNT - 1) reward = self.ARRIVED_REWARD if self.has_arrived(vehicle) else reward if self.config["normalize_reward"]: reward = utils.lmap( reward, [self.config["collision_reward"], self.ARRIVED_REWARD], [0, 1]) return reward
def _reward(self, action: int) -> float: """ The vehicle is rewarded for driving with high speed and collision avoidance. :param action: the action performed :return: the reward of the state-action transition """ neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index) lane = self.vehicle.lane_index[2] scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) reward = \ + self.config["collision_reward"] * self.vehicle.crashed \ + self.config["left_lane_reward"] * lane / max(len(neighbours) - 1, 1) \ + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) reward = utils.lmap(reward, [self.config["collision_reward"], self.config["high_speed_reward"] + self.config["left_lane_reward"]], [0, 1]) reward = 0 if not self.vehicle.on_road else reward return reward
def get_hat(env, z): p = env.vehicle.position i_h = env.vehicle.heading i_s = env.vehicle.speed road_r = env.config["radius"] temp_n = [ lmap(p[0], [-(road_r + 4), (road_r + 4)], [-1, 1]), lmap(p[1], [0, 2 * (road_r + 4)], [-1, 1]), lmap(i_h, [-2 * np.pi, 2 * np.pi], [-1, 1]), lmap(i_s, [-max_v, max_v], [-1, 1]) ] x_road, y_road, x_road_n, y_road_n = env.vehicle.target_lane_position( p, road_r) s0 = temp_n s0 = np.array(s0) s0 = torch.Tensor(s0) s0 = s0.unsqueeze(0) ss = x_road_n + y_road_n ss = np.array(ss) ss = torch.Tensor(ss) ss = ss.unsqueeze(0) label = z s_hat, _, _, _, _ = clpp(s0, ss, label, em) print("ok") u_hat = clde(s0, ss, hidden) s_hat = s_hat.squeeze(0) u_hat = u_hat.squeeze(0) s0 = s0.squeeze(0) s_hat = s_hat.detach().cpu().numpy() u_hat = u_hat.detach().cpu().numpy() s0 = s0.detach().cpu().numpy() x_f = s_hat.reshape(4, 50)[:, -1] print(x_f) # print("原本的终端是 ", x_f) # x_f_local_x, x_f_local_y = env.road.network.get_lane(v_lane_id).local_coordinates(np.array([x_f[0], x_f[1]])) # x_f_local_y = np.clip(x_f_local_y, -4, 4) # x_f_xy = env.road.network.get_lane(v_lane_id).position(x_f_local_x, x_f_local_y) # x_f[0] = x_f_xy[0] # x_f[1] = x_f_xy[1] # x_f[2] = env.road.network.get_lane(v_lane_id).heading_at(x_f_xy[0]) # print("现在的终端是 ", x_f) return s0, ss, s_hat, u_hat, x_f
def _reward(self, action) -> float: """ The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed. :param action: the action performed :return: the reward of the state-action transition """ ''' action_reward = {0: self.LANE_CHANGE_REWARD, 1: 0, 2: self.LANE_CHANGE_REWARD, 3: 0, 4: 0} ''' if self.vehicle.speed < 0: return self.COLLISION_REWARD * self.vehicle.crashed - self.HIGH_SPEED_REWARD current_lane = self.vehicle.lane longi, lat = current_lane.local_coordinates(self.vehicle.position) heading_reward = 1 - np.abs(current_lane.heading_at(longi) - self.vehicle.heading) lane_center_reward = 1 - (lat/current_lane.width)**2 scaled_speed = utils.lmap(self.vehicle.speed, [20, 30], [0, 1]) reward = self.COLLISION_REWARD * self.vehicle.crashed \ + self.RIGHT_LANE_REWARD * self.vehicle.lane_index[2] / 1 \ + self.HIGH_SPEED_REWARD * np.clip(scaled_speed, -1, 1) \ + self.LANE_HEADING_REWARD * heading_reward \ + self.LANE_CENTERING_REWARD * lane_center_reward # + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / (self.vehicle.SPEED_COUNT - 1) # Altruistic penalty for vehicle in self.road.vehicles: if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle): reward += self.MERGING_SPEED_REWARD * \ (vehicle.target_speed - vehicle.speed) / vehicle.target_speed return utils.lmap(reward, [self.COLLISION_REWARD + self.MERGING_SPEED_REWARD, self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD + self.LANE_HEADING_REWARD + self.LANE_CENTERING_REWARD], [0, 1])
def _agent_reward_roundabout(self, vehicle, action): neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index) lane_change = action == 0 or action == 2 reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX] scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1]) collision_reward_value = self.collision_reward * vehicle.crashed high_speed_reward_value = self.high_speed_reward * np.clip( scaled_speed, 0, 1) lane_change_reward_value = self.lane_change_reward * lane_change # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \ # np.clip(self.avg_speeds, 0, 1) cooperative_reward_value_avg_speeds = 0 reward = \ collision_reward_value \ + high_speed_reward_value \ + cooperative_reward_value_avg_speeds\ + lane_change_reward_value if self.normalize_reward: reward = utils.lmap(reward, [ self.collision_reward + self.lane_change_reward, self.high_speed_reward ], [0, 1]) # reward = 0 if not vehicle.on_road else reward reward_info = { "reward_crashed": collision_reward_value, "reward_on_lane": 0, "reward_scaled_speed": high_speed_reward_value, "reward_lane_change": lane_change_reward_value, "reward_distance": 0, "reward_avg_speeds": cooperative_reward_value_avg_speeds, "reward_has_merged": 0, "reward_distance_merge_vehicle": 0, # "normalized_timestep_reward": reward # "vehilce_id": vehicle.id } return reward, reward_info
def _reward(self, action: np.ndarray) -> float: _, lateral = self.vehicle.lane.local_coordinates(self.vehicle.position) lane_centering_reward = 1 / ( 1 + self.config["lane_centering_cost"] * lateral**2) action_reward = self.config["action_reward"] * np.linalg.norm(action) reward = lane_centering_reward \ + action_reward \ + self.config["collision_reward"] * self.vehicle.crashed reward = reward if self.vehicle.on_road else self.config[ "collision_reward"] return utils.lmap(reward, [self.config["collision_reward"], 1], [0, 1])
def _reward(self, action: Action) -> float: """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ lane_change = 0 if action == 2 or action == 0 else 1 neighbours = self.env.road.network.all_side_lanes( self.env.vehicle.lane_index) lane = self.env.vehicle.target_lane_index[2] if isinstance(self.env.vehicle, ControlledVehicle) \ else self.env.vehicle.lane_index[2] # if self.env.vehicle.crashed: # reward = self.env.config["collision_reward"]*100 # else: # reward = 10 # + self.env.RIGHT_LANE_REWARD * lane*10 / max(len(neighbours) - 1, 1) \ scaled_speed = utils.lmap(self.env.vehicle.speed, self.env.config["reward_speed_range"], [0, 1]) # print(scaled_speed, self.env.config["reward_speed_range"]) # print(scaled_speed) # print(self.env.RIGHT_LANE_REWARD*lane) # print(self.env.config["collision_reward"]) crashed = -1 if self.env.vehicle.crashed else 1 reward = \ + 0.05*lane_change \ + 0.2 * lane / max(len(neighbours) - 1, 1) \ + 0.65 * scaled_speed \ + crashed*1.1 # print(reward) # reward = self.env.config["collision_reward"] if self.env.vehicle.crashed else reward reward = utils.lmap(reward, [-2, 2], [-1, 1]) # print(reward) # reward = utils.lmap(reward, # [self.env.config["collision_reward"]*5, (self.env.HIGH_SPEED_REWARD + self.env.RIGHT_LANE_REWARD)*5], # [0, 5]) # print(reward) # print(self.env.vehicle.speed, reward) reward = 0 if not self.env.vehicle.on_road else reward return reward
def target_lane_position(self, initial_position, road_r): target_lane = self.road.network.get_lane(self.target_lane_index) lane_coords = target_lane.local_coordinates(initial_position) # x_r = lane_coords[0] + np.arange(0, 301, 10) # lane_x = [target_lane.position(item, 0)[0] for item in x_r] # lane_y = [0 for item in x_r] x_r = lane_coords[0] y_r = 0 lane_x = [] lane_x_n = [] lane_y = [] lane_y_n = [] for item in np.arange(0, 101, 10): xx, yy = target_lane.position(x_r + item, 0) xx_n = lmap(xx, [-(road_r + 4), (road_r + 4), 254], [-1, 1]) yy_n = lmap(yy, [0, 2 * (road_r + 4)], [-1, 1]) lane_x.append(xx) lane_x_n.append(xx_n) lane_y.append(yy) lane_y_n.append(yy_n) return lane_x, lane_y, lane_x_n, lane_y_n
def _reward(self, action: Action) -> float: """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \ else self.vehicle.lane_index scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) reward = self.config["collision_reward"] * self.vehicle.crashed \ + self.config["goal_reward"] * self._is_success() \ + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \ + self.config["right_lane_reward"] * lane_index[-1] reward = utils.lmap( reward, [self.config["collision_reward"], self.config["goal_reward"]], [0, 1]) reward = np.clip(reward, 0, 1) return reward
def _reward(self, action: Action) -> float: """ The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. :param action: the last action performed :return: the corresponding reward """ reward = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) if self.vehicle.crashed: reward = self.config["collision_reward"] * reward return reward
def get_avg_speeds(self, sympathy_flag=False, scaled_speed_flag=False): speeds = [] if sympathy_flag: for vehicle in self.env.road.vehicles: if scaled_speed_flag: scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) else: scaled_speed = vehicle.speed speeds.append(scaled_speed) else: for vehicle in self.env.controlled_vehicles: if scaled_speed_flag: scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) else: scaled_speed_flag = vehicle.speed speeds.append(scaled_speed) return np.mean(speeds)
def get_sum_of_speeds(self, sympathy_flag=False, scaled_speed_flag=False): sum_of_speeds = 0 if sympathy_flag: for vehicle in self.env.road.vehicles: if scaled_speed_flag: scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) else: scaled_speed = vehicle.speed sum_of_speeds += scaled_speed else: for vehicle in self.env.controlled_vehicles: if scaled_speed_flag: scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range, [0, 1]) else: scaled_speed = vehicle.speed sum_of_speeds += scaled_speed return sum_of_speeds
def normalize_obs(self, observation: np.ndarray) -> np.ndarray: """ Normalize the observation values. :param ndarray observation: observation data """ if self.features_range is None: side_lanes = self.env.road.network.all_side_lanes( self.env.vehicle.lane_index) self.features_range = { "lane_index": [0, len(side_lanes) - 1], "x": [-5.0 * MDPVehicle.SPEED_MAX, 5.0 * MDPVehicle.SPEED_MAX], "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX] } observation[0, 0] = utils.lmap(observation[0, 0], self.features_range['lane_index'], [0, 1]) observation[1:, 0] = utils.lmap(observation[1:, 0], self.features_range['x'], [-1, 1]) observation[:, 1] = utils.lmap(observation[:, 1], self.features_range['vx'], [-1, 1]) observation = np.clip(observation, -1, 1) return observation