def world3(flag=False): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [ clane.shifted(2), clane.shifted(-2), clane.shifted(2.5), clane.shifted(-2.5) ] world.cars.append( car.UserControlledCar(dyn, [0., 0., math.pi / 2., 0.3], color='red')) world.cars.append( car.NestedOptimizerCar(dyn, [0., 0.3, math.pi / 2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] world.cars[0].bounds = [(-3., 3.), (-1., 1.)] if flag: world.cars[0].follow = world.cars[1].traj_h r_h = world.simple_reward([ world.cars[1].traj ]) + 100. * feature.bounded_control(world.cars[0].bounds) @feature.feature def human(t, x, u): return (world.cars[1].traj_h.x[t][0]) * 10 r_r = 300. * human + world.simple_reward(world.cars[1], speed=0.5) world.cars[1].rewards = (r_h, r_r) #world.objects.append(Object('firetruck', [0., 0.7])) return world
def get_control_reward(self, fw): """Compute the control reward.""" control_r = (self.w_control * feature.control()) bounded_control_r = ( self.w_bounded_control * feature.bounded_control(fw, self.car_control_bounds)) return control_r + bounded_control_r
def world1(flag=False): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2)] world.cars.append( car.UserControlledCar(dyn, [-0.13, 0., math.pi / 2., 0.3], color='red')) world.cars.append( car.NestedOptimizerCar(dyn, [0.0, 0.5, math.pi / 2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] if flag: world.cars[0].follow = world.cars[1].traj_h r_h = world.simple_reward( [world.cars[1].traj], speed_import=.2 if flag else 1., speed=0.8 if flag else 1.) + 100. * feature.bounded_control(world.cars[0].bounds) @feature.feature def human_speed(t, x, u): return -world.cars[1].traj_h.x[t][3]**2 r_r = 300. * human_speed + world.simple_reward(world.cars[1], speed=0.5) if flag: world.cars[0].follow = world.cars[1].traj_h world.cars[1].rewards = (r_h, r_r) #world.objects.append(Object('cone', [0., 1.8])) return world
def assign_nest_goal(self, acting_car, passive_car, acting_car_reward): if passive_car is None: acting_car.nested = False acting_car.simple_reward = acting_car_reward else: acting_car.nested = True acting_car.human = passive_car # create a list of all the trajectories that the human is doing collision avoidance with. # use true (not linear) trajectory for the robot car, since it is a Stackelberg game. # TODO: only use the cars close to the human trajs_to_avoid = [] for ca in self.cars: if ca is acting_car: trajs_to_avoid.append(ca.traj) elif ca is not passive_car: trajs_to_avoid.append(ca.linear) hum_reward = self.simple_reward( trajs_to_avoid) + 100. * feature.bounded_control( passive_car.bounds) acting_car.nested_rewards = (hum_reward, acting_car_reward)
def world0(): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2)] world.cars.append( car.UserControlledCar(dyn, [-0.13, 0., math.pi / 2., 0.3], color='red')) world.cars.append( car.NestedOptimizerCar(dyn, [0.0, 0.5, math.pi / 2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] r_h = world.simple_reward([ world.cars[1].traj ]) + 100. * feature.bounded_control(world.cars[0].bounds) @feature.feature def human_speed(t, x, u): return -world.cars[1].traj_h.x[t][3]**2 r_r = world.simple_reward(world.cars[1], speed=0.5) world.cars[1].rewards = (r_h, r_r) return world
def world6(know_model=True): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2), clane.shifted(2.5), clane.shifted(-2.5)] world.cars.append(car.SimpleOptimizerCar(dyn, [-0.13, 0., math.pi/2., 0.5], color='red')) if know_model: world.cars.append(car.NestedOptimizerCar(dyn, [0., 0.05, math.pi/2., 0.5], color='yellow')) else: world.cars.append(car.SimpleOptimizerCar(dyn, [0., 0.05, math.pi/2., 0.5], color='yellow')) world.cars[0].reward = world.simple_reward(world.cars[0], speed=0.6) world.cars[0].default_u = np.asarray([0., 1.]) @feature.feature def goal(t, x, u): return -(10.*(x[0]+0.13)**2+0.5*(x[1]-2.)**2) if know_model: world.cars[1].human = world.cars[0] r_h = world.simple_reward([world.cars[1].traj], speed=0.6)+100.*feature.bounded_control(world.cars[0].bounds) r_r = 10*goal+world.simple_reward([world.cars[1].traj_h], speed=0.5) world.cars[1].rewards = (r_h, r_r) else: r = 10*goal+world.simple_reward([world.cars[0].linear], speed=0.5) world.cars[1].reward = r return world
def world0(): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2)] world.cars.append(car.UserControlledCar(dyn, [-0.13, 0., math.pi/2., 0.3], color='red')) world.cars.append(car.NestedOptimizerCar(dyn, [0.0, 0.5, math.pi/2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] r_h = world.simple_reward([world.cars[1].traj])+100.*feature.bounded_control(world.cars[0].bounds) @feature.feature def human_speed(t, x, u): return -world.cars[1].traj_h.x[t][3]**2 r_r = world.simple_reward(world.cars[1], speed=0.5) world.cars[1].rewards = (r_h, r_r) return world
def world3(flag=False): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2), clane.shifted(2.5), clane.shifted(-2.5)] world.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi/2., 0.3], color='red')) world.cars.append(car.NestedOptimizerCar(dyn, [0., 0.3, math.pi/2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] world.cars[0].bounds = [(-3., 3.), (-1., 1.)] if flag: world.cars[0].follow = world.cars[1].traj_h r_h = world.simple_reward([world.cars[1].traj])+100.*feature.bounded_control(world.cars[0].bounds) @feature.feature def human(t, x, u): return (world.cars[1].traj_h.x[t][0])*10 r_r = 300.*human+world.simple_reward(world.cars[1], speed=0.5) world.cars[1].rewards = (r_h, r_r) #world.objects.append(Object('firetruck', [0., 0.7])) return world
def world4(flag=False): dyn = dynamics.CarDynamics(0.1) world = World() vlane = lane.StraightLane([0., -1.], [0., 1.], 0.13) hlane = lane.StraightLane([-1., 0.], [1., 0.], 0.13) world.lanes += [vlane, hlane] world.fences += [hlane.shifted(-1), hlane.shifted(1)] world.cars.append( car.UserControlledCar(dyn, [0., -.3, math.pi / 2., 0.0], color='red')) world.cars.append( car.NestedOptimizerCar(dyn, [-0.3, 0., 0., 0.], color='yellow')) world.cars[1].human = world.cars[0] world.cars[0].bounds = [(-3., 3.), (-2., 2.)] if flag: world.cars[0].follow = world.cars[1].traj_h world.cars[1].bounds = [(-3., 3.), (-2., 2.)] @feature.feature def horizontal(t, x, u): # isn't x[2] the heading, not the y location? return -x[2]**2 r_h = world.simple_reward( [world.cars[1].traj], lanes=[vlane], fences=[vlane.shifted(-1), vlane.shifted(1)] * 2) + 100. * feature.bounded_control(world.cars[0].bounds) @feature.feature def human(t, x, u): return -tt.exp(-10 * (world.cars[1].traj_h.x[t][1] - 0.13) / 0.1) r_r = human * 10. + horizontal * 30. + world.simple_reward( world.cars[1], lanes=[hlane] * 3, fences=[hlane.shifted(-1), hlane.shifted(1)] * 3 + [hlane.shifted(-1.5), hlane.shifted(1.5)] * 2, speed=0.9) world.cars[1].rewards = (r_h, r_r) return world
def reward(self, reward): # tar fram reward med hjalp av input och reward fran bounded_control # bounded_control anvander sig av kollisions boxarna i varlden self._reward = reward + 100. * feature.bounded_control(self.bounds) self.optimizer = None # skapar en tom optimizer
def world_kex1(know_model=True): start_human = -0.13 start_robot = -0.00 dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [ clane.shifted(2), clane.shifted(-2), clane.shifted(2.5), clane.shifted(-2.5) ] #world.cars.append(car.SimpleOptimizerCar(dyn, [start_human, 0., math.pi/2., 0.5], color='red')) # red car is human world.cars.append( car.NestedOptimizerCar(dyn, [start_human, 0., math.pi / 2., 0.5], color='red')) # red car is human if know_model: # yellow car is the robot that uses nested optimizer to find the way world.cars.append( car.NestedOptimizerCar(dyn, [start_robot, 0.0, math.pi / 2., 0.5], color='yellow')) else: world.cars.append( car.SimpleOptimizerCar(dyn, [start_robot, 0.0, math.pi / 2., 0.5], color='yellow')) world.cars[0].reward = world.simple_reward(world.cars[0], speed=0.6) world.cars[0].default_u = np.asarray([0., 1.]) @feature.feature def goal(t, x, u): # doesnt need this k = -(10. * (x[0] + 0.13)**2 + 0.5 * (x[1] - 2.)**2) #ASK Elis #print("--------", x[0].auto_name) #print("--------", x[1].auto_name) #exit() return k # object-------------- world.cars.append( car.SimpleOptimizerCar(dyn, [-0.13, 0.5, math.pi / 2., 0.0], color='blue')) # blue car is obstacle #world.cars.append(car.NestedOptimizerCar(dyn, [-0.13, 0.5, math.pi/2., 0.0], color='blue')) # blue car is obstacle #print(world.cars) #exit() world.cars[2].reward = world.simple_reward(world.cars[2], speed=0.0) #world.cars[2].reward = 1 world.cars[2].default_u = np.asarray([0., 0.]) world.cars[2].movable = False #------------------ if know_model: world.cars[1].human = world.cars[ 0] # [1] is robot, asigns that the robot knows who is the human world.cars[1].obstacle = world.cars[2] world.cars[0].obstacle = world.cars[2] world.cars[0].human = world.cars[1] # reward with respect to the robot trajectory: world.cars[1].traj r_h = world.simple_reward( [world.cars[1].traj], speed=0.5) + 100. * feature.bounded_control( world.cars[0].bounds) + 100. * feature.bounded_control( world.cars[2].bounds) #r_r = 10*goal+world.simple_reward([world.cars[1].traj_h], speed=0.5 r_r = world.simple_reward( [world.cars[1].traj_h], speed=0.5) + 100. * feature.bounded_control(world.cars[2].bounds) r_h2 = world.simple_reward( [world.cars[1].traj_h], speed=0.5) + 100. * feature.bounded_control(world.cars[0].bounds) +100. * feature.bounded_control(world.cars[2].bounds) #r_r = 10*goal+world.simple_reward([world.cars[1].traj_h], speed=0.5 r_r2 = world.simple_reward( [world.cars[1].traj], speed=0.5) + 100. * feature.bounded_control(world.cars[2].bounds) #r_obj = world.simple_reward([world.cars[1].traj_h], speed=0.0) world.cars[1].rewards = (r_h, r_r) #ADD: r_object world.cars[0].rewards = (r_h2, r_r2) #(optimize on, the car) #print(r_h) #print(r_r) #print(world.cars[1].rewards) #exit() else: r = 10 * goal + world.simple_reward([world.cars[0].linear], speed=0.5) world.cars[1].reward = r #world.cars.append(static_obj.SimpleOptimizerCar(dyn, [-0.13, 0.5, math.pi/2., 0.0], color='blue')) # blue car is obstacle) return world
def world_kex_old(know_model=True): dyn = dynamics.CarDynamics2(0.1) #dyn.dt = 1.0 #dyn.fiction = 0.0 world = World() # clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) # world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] # world.roads += [clane] # world.fences += [clane.shifted(2), clane.shifted(-2), clane.shifted(2.5), clane.shifted(-2.5)] clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1)] #world.roads += [clane, clane.shifted(1)] world.fences += [clane.shifted(2), clane.shifted(-1)] human_is_follower = False # CAR 0 = Human # CAR 1 = Robot # CAR 2 = Obstacle # IMPORTANT: Folower must be created first # depending on what our human is, follower or leader we create the cars differently if human_is_follower: # Create the cars----- # Human Car #world.cars.append(car.NestedOptimizerCarFollower(dyn, [-0.13, 0.0, math.pi/2., 0.5], color='red', T=3)) world.cars.append( car.NestedOptimizerCarFollower2(dyn, [-0.13, 0.0, math.pi / 2., 0.5], color='red', T=3)) # Robot Car world.cars.append( car.NestedOptimizerCarLeader(dyn, [-0., 0., math.pi / 2., 0.5], color='yellow', T=3)) #world.cars[0].leader = world.cars[1] #world.cars[0].leader1 = world.cars[1] # -------------------- else: # Create the cars----- # Human Car world.cars.append( car.NestedOptimizerCarFollower2(dyn, [0., 0., math.pi / 2., 0.5], color='yellow', T=3)) world.cars.append( car.NestedOptimizerCarLeader(dyn, [-0.13, 0.0, math.pi / 2., 0.5], color='red', T=3)) # Robot Car #world.cars.append(car.NestedOptimizerCarFollower(dyn, [0., 0., math.pi/2., 0.5], color='yellow', T=3)) #world.cars[1].leader = world.cars[0] #world.cars[1].leader1 = world.cars[0] # -------------------- # Obstacle Car #world.cars.append(car.SimpleOptimizerCar(dyn, [-0.13, 0.5, math.pi/2., 0.5], color='blue')) # doesnt work because it cant force the car to turn around world.cars.append( car.SimpleOptimizerCar(dyn, [-0.13, 2, math.pi / 4., 0.], color='blue')) # -------------------- # Reward and default for the Human --- # speed did not change here # world.cars[0].reward = world.simple_reward(world.cars[0], speed=0.6) world.cars[0].default_u = np.asarray([0., 1.]) # ------------------------------------ # Reward and default for the Robot --- # world.cars[1].reward = world.simple_reward(world.cars[1], speed=0.6) world.cars[1].default_u = np.asarray([0., 1.]) # ------------------------------------ # Reward and default for the Obstacle --- world.cars[2].reward = world.simple_reward(world.cars[2], speed=0.) world.cars[2].default_u = np.asarray([0., 0.]) world.cars[2].movable = False # ------------------------------------ # CAR 0 = Human # CAR 1 = Robot # CAR 2 = Obstacle if human_is_follower: world.cars[0].leader = world.cars[1] world.cars[0].obstacle = world.cars[2] world.cars[1].follower = world.cars[0] world.cars[1].obstacle = world.cars[2] else: world.cars[1].follower = world.cars[0] world.cars[1].obstacle = world.cars[2] world.cars[0].leader = world.cars[1] world.cars[0].obstacle = world.cars[2] # CAR 0 = Human # CAR 1 = Robot # CAR 2 = Obstacle # TODO: Fix this part, unsure how to make the world.simplereward # calculates the dynamic(chaning) rewards for the cars depending on their speed and collision with other cars and obstacles #TODO: this is what is wrong, they need to be the same # TODO: cars dont want to slow down, find a solution that works if human_is_follower: # HUMAN #r_h = world.simple_reward([world.cars[1].traj], speed=0.6)+100.*feature.bounded_control(world.cars[0].bounds)+world.simple_reward(world.cars[0].traj_o, speed=0.) # Reward for the human r_h = world.simple_reward( [world.cars[1].traj], speed=0.80) + 100. * feature.bounded_control( world.cars[0].bounds) + 1 * world.simple_reward( world.cars[1].traj_o, speed=0.80) # Reward for the human # ROBOT r_r = world.simple_reward( [world.cars[1].traj_h], speed=0.8) + 100. * feature.bounded_control( world.cars[1].bounds) + 1 * world.simple_reward( world.cars[1].traj_o, speed=0.8) # Reward for the robot else: # HUMAN r_h = world.simple_reward( [world.cars[1].traj_h], speed=0.8) + 100. * feature.bounded_control( world.cars[0].bounds) + 1 * world.simple_reward( world.cars[1].traj_o, speed=0.8) # Reward for the human # ROBOT r_r = world.simple_reward( [world.cars[1].traj], speed=0.8) + 100. * feature.bounded_control( world.cars[1].bounds) + 1 * world.simple_reward( world.cars[1].traj_o, speed=0.8) # Reward for the robot r_o = 1. * feature.bounded_control(world.cars[2].bounds) #r_o = world.simple_reward([world.cars[0].traj_o], speed=0.) world.cars[0].rewards = (r_r, r_h, r_o) world.cars[1].rewards = (r_h, r_r, r_o) # ------------------------------------ return world
def move_humans_from_lane(self, lane_idx): def move_from_lane(rob_car, desired_xloc, width=0.5, lane_width=lane.DEFAULT_WIDTH): @feature.feature def f(t, x, u): return tt.exp(-0.5 * ((rob_car.traj_h.x[t][0] - desired_xloc)**2) / (width**2 * lane_width * lane_width / 4.)) return f # collect the cars in the lane of interest lane_car_idcs = [] for i in range(len(self.cars)): if (self.veh_lanes[self.cars[i]] == lane_idx): lane_car_idcs.append(i) # Order the cars by position # create a list of the vehicle y-positions in the mixed lane mixed_veh_ypos = [self.cars[i].x[1] for i in lane_car_idcs] # now sort these indices according to vehicle y-position sorted_idcs = np.argsort(np.array(mixed_veh_ypos)) # Now go through these cars and see if there is a human vehicle behind them. # If so, get the human to leave the lane. If not, if there is a robot in front, # platoon with them for i in range(len(sorted_idcs)): acting_car = self.cars[lane_car_idcs[sorted_idcs[i]]] # only assign goals if the car is a robot if acting_car.iamrobot: # If this isn't the last car in the lane, check behind it if (i != 0): if not self.cars[lane_car_idcs[sorted_idcs[i - 1]]].iamrobot: # then kick the car out acting_car.nested = True acting_car.human = self.cars[lane_car_idcs[sorted_idcs[ i - 1]]] rob_reward = acting_car.baseline_reward + 400. * move_from_lane( acting_car, desired_xloc=self.lane_centers[ self.veh_lanes[acting_car] + 1]) #TODO: add speed into here! maybe assume that the current speed is the desired speed?? # create a list of all the trajectories that the human is doing collision avoidance with. # use true (not linear) trajectory for the robot car, since it is a Stackelberg game. # TODO: only use the cars close to the human trajs_to_avoid = [] for ca in self.cars: if ca is acting_car: trajs_to_avoid.append(ca.traj) elif ca is not self.cars[lane_car_idcs[sorted_idcs[ i - 1]]]: trajs_to_avoid.append(ca.linear) hum_reward = self.simple_reward( trajs_to_avoid) + 100. * feature.bounded_control( acting_car.human.bounds) acting_car.nested_rewards = (hum_reward, rob_reward) # if the car in front it is a robot, then be free to platoon (since the car behind it is a robot too) # check that it is not the front-most car in the lane elif i < len(mixed_veh_ypos) - 1: # if there is a robot in front of it, platoon with them if self.cars[lane_car_idcs[sorted_idcs[i + 1]]].iamrobot: # Then platoon with the car in front acting_car.nested = False acting_car.platoon( self.cars[lane_car_idcs[sorted_idcs[i + 1]]]) # if it is the rear-most vehicle in a lane, check that it is not the only vehicle in the lane elif i < len(mixed_veh_ypos) - 1: if self.cars[lane_car_idcs[sorted_idcs[i + 1]]].iamrobot: # Then platoon with the car in front acting_car.nested = False acting_car.platoon( self.cars[lane_car_idcs[sorted_idcs[i + 1]]])
def world_kex(know_model=True): dyn = dynamics.CarDynamics2(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1)] #world.roads += [clane, clane.shifted(1)] world.fences += [clane.shifted(2), clane.shifted(-1)] # both behind: pos=0.027 (both begind) and pos=0.028 (different behaviours) # both infront: pos=0.128 (different behaviours) and pos=0.129 (both infront) to switch # We run: # T_stepls = 3 # step_per_u = 2 # speed = 0.80 # we have 3 different results: # 1. both begind # 2. both infront # 3. different behaviour depending on role # to get the distance take the pos/0.13 to get it to irl meters left_is_follower = False pos = 0.15 #pos = 0.15 #pos=0.0 T_steps = 3 speed = 0.80 #pos = 0.128 #pos = 0.028 # THIS WORKS # steps per u is 2 #left_is_follower = False #T_steps = 3 #pos = 0.10 #WORKS #speed = 0.80 # Demonstration left_color = "green" right_color = "blue-dark" # Real #follower_color = "yellow" #leader_color = "red" # Follower must alwasy be created first, otherwise it won't move if left_is_follower: world.cars.append( car.NestedOptimizerCarFollower2(dyn, [-0.13, pos, math.pi / 2., speed], color=left_color, T=T_steps)) world.cars.append( car.NestedOptimizerCarLeader(dyn, [-0.0, 0.0, math.pi / 2., speed], color=right_color, T=T_steps)) else: world.cars.append( car.NestedOptimizerCarFollower2(dyn, [-0.0, 0.0, math.pi / 2., speed], color=right_color, T=T_steps)) world.cars.append( car.NestedOptimizerCarLeader(dyn, [-0.13, pos, math.pi / 2., speed], color=left_color, T=T_steps)) #world.cars.append(car.SimpleOptimizerCar(dyn, [-0.13, 2, math.pi/4., 0.], color='blue')) # THE OBSTACLE IT WORKS WITH #world.cars.append(car.SimpleOptimizerCar(dyn, [-0.20, 1, math.pi/4., 0.], color='blue')) # THE OBSTACLE FOR DEMONSTRATIONS world.cars.append( car.SimpleOptimizerCar(dyn, [-0.20, 0.7, math.pi / 4., 0.], color='gray')) # default_u for the cars world.cars[0].default_u = np.asarray([0., 1.]) world.cars[1].default_u = np.asarray([0., 1.]) # Reward and default for the Obstacle --- world.cars[2].reward = world.simple_reward(world.cars[2], speed=0.) world.cars[2].default_u = np.asarray([0., 0.]) world.cars[2].movable = False # tells the cars who is the follower and who is the leader world.cars[0].leader = world.cars[1] world.cars[1].follower = world.cars[0] world.cars[0].obstacle = world.cars[2] world.cars[1].obstacle = world.cars[2] r_leader = world.simple_reward( [world.cars[1].traj_h, world.cars[1].traj_o, world.cars[1].traj_o], speed=speed) # leader doesnt need bounded controls, only the follower r_follower = world.simple_reward( [world.cars[1].traj, world.cars[1].traj_o, world.cars[1].traj_o], speed=speed) + 100. * feature.bounded_control(world.cars[0].bounds) r_o = 0. #r_o = world.simple_reward([world.cars[0].traj_o], speed=0.) world.cars[0].rewards = (r_leader, r_follower) world.cars[1].rewards = (r_follower, r_leader) # ------------------------------------ return world
def world5(): dyn = dynamics.CarDynamics(0.1) world = World() vlane = lane.StraightLane([0., -1.], [0., 1.], 0.13) hlane = lane.StraightLane([-1., 0.], [1., 0.], 0.13) world.lanes += [vlane, hlane] world.fences += [hlane.shifted(-1), hlane.shifted(1)] world.cars.append(car.UserControlledCar(dyn, [0., -.3, math.pi/2., 0.0], color='red')) world.cars.append(car.NestedOptimizerCar(dyn, [-0.3, 0., 0., 0.0], color='yellow')) world.cars[1].human = world.cars[0] world.cars[1].bounds = [(-3., 3.), (-2., 2.)] @feature.feature def horizontal(t, x, u): return -x[2]**2 r_h = world.simple_reward([world.cars[1].traj], lanes=[vlane], fences=[vlane.shifted(-1), vlane.shifted(1)]*2)+100.*feature.bounded_control(world.cars[0].bounds) @feature.feature def human(t, x, u): return -tt.exp(10*(world.cars[1].traj_h.x[t][1]-0.13)/0.1) r_r = human*10.+horizontal*2.+world.simple_reward(world.cars[1], lanes=[hlane]*3, fences=[hlane.shifted(-1), hlane.shifted(1)]*3+[hlane.shifted(-1.5), hlane.shifted(1.5)]*2, speed=0.9) world.cars[1].rewards = (r_h, r_r) return world
def world1(flag=False): dyn = dynamics.CarDynamics(0.1) world = World() clane = lane.StraightLane([0., -1.], [0., 1.], 0.13) world.lanes += [clane, clane.shifted(1), clane.shifted(-1)] world.roads += [clane] world.fences += [clane.shifted(2), clane.shifted(-2)] world.cars.append(car.UserControlledCar(dyn, [-0.13, 0., math.pi/2., 0.3], color='red')) world.cars.append(car.NestedOptimizerCar(dyn, [0.0, 0.5, math.pi/2., 0.3], color='yellow')) world.cars[1].human = world.cars[0] if flag: world.cars[0].follow = world.cars[1].traj_h r_h = world.simple_reward([world.cars[1].traj], speed_import=.2 if flag else 1., speed=0.8 if flag else 1.)+100.*feature.bounded_control(world.cars[0].bounds) @feature.feature def human_speed(t, x, u): return -world.cars[1].traj_h.x[t][3]**2 r_r = 300.*human_speed+world.simple_reward(world.cars[1], speed=0.5) if flag: world.cars[0].follow = world.cars[1].traj_h world.cars[1].rewards = (r_h, r_r) #world.objects.append(Object('cone', [0., 1.8])) return world
def reward(self, reward): self._reward = reward + 100. * feature.bounded_control(self.bounds) self.optimizer = None
def assign_goals_phase_zero(self): # make sure we are in the correct phase assert (self.cur_phase == 0) # first update our knowledge of where the vehicles are self.find_veh_lanes() for c in self.rob_cars: if (self.veh_lanes[c] == self.rob_cars_lane_asg[c]): # change the a simple optimizer and remove the car from the list c.nested = False # put back to baseline reward c.simple_reward = c.baseline_reward # Now with the remaining vehicles, assign them reward functions to get closer to their goals # TODO: if there is a smart vehicle nearby, just go in front of that one and turn down collision avoidance for smart vehicles def move_to_lane(desired_xloc, width=0.5, lane_width=lane.DEFAULT_WIDTH): @feature.feature def f(t, x, u): return tt.exp(-0.5 * ((x[0] - desired_xloc)**2) / (width**2 * lane_width * lane_width / 4.)) return f for c in self.rob_cars: # if the car is in the right place, make sure it goes back to its baseline reward if (self.veh_lanes[c] == self.rob_cars_lane_asg[c]): c.nested = False # put back to baseline reward c.simple_reward = c.baseline_reward else: if (self.rob_cars_lane_asg[c] > self.veh_lanes[c]): # Then we want to move the car to the right # TODO: does this actually change the reward or just in a shallow copy? #new_reward = c.simple_reward + move_right_reward new_reward = c.baseline_reward + 100. * move_to_lane( desired_xloc=self.lane_centers[self.veh_lanes[c] + 1]) move_direction = 1 else: new_reward = c.baseline_reward + 100. * move_to_lane( desired_xloc=self.lane_centers[self.veh_lanes[c] - 1]) move_direction = -1 # Find if there is a vehicle blocking -- just pair with the first vehicle behind it in the lane over #TODO: change this so it can be one car-length in front and still be considered blocking candidate = None for cb in self.cars: if cb.iamrobot: continue if self.veh_lanes[ cb] == self.veh_lanes[c] + move_direction: if (candidate is None) and (cb.x[1] <= c.x[1]): candidate = cb else: # Find which is more blocking -- closer in y-position but still behind if ((cb.x[1] <= c.x[1]) and (cb.x[1] > candidate.x[1])): candidate = cb if candidate is None: c.nested = False c.simple_reward = new_reward else: c.nested = True c.human = candidate # create a list of all the trajectories that the human is doing collision avoidance with. # use true (not linear) trajectory for the robot car, since it is a Stackelberg game. # TODO: only use the cars close to the human trajs_to_avoid = [] for ca in self.cars: if ca is c: trajs_to_avoid.append(ca.traj) elif ca is not candidate: trajs_to_avoid.append(ca.linear) #TODO: add speed into simple reward! hum_reward = self.simple_reward( trajs_to_avoid) + 100. * feature.bounded_control( candidate.bounds) c.nested_rewards = ( hum_reward, new_reward ) # This assumes perfect knowledge of human reward functions
def reward(self, reward): self._reward = reward+100.*feature.bounded_control(self.bounds) self.optimizer = None