def get_control_reward(self, fw): """Compute the control reward.""" control_r = (self.w_control * feature.control()) bounded_control_r = ( self.w_bounded_control * feature.bounded_control(fw, self.car_control_bounds)) return control_r + bounded_control_r
def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.): if lanes is None: lanes = self.lanes if roads is None: roads = self.roads if fences is None: fences = self.fences if trajs is None: trajs = [c.linear for c in self.cars] elif isinstance(trajs, car.Car): trajs = [c.linear for c in self.cars if c != trajs] r = 0.1 * feature.control() theta = [1., -50., 10., 10., -60.] # Simple model # theta = [.959, -46.271, 9.015, 8.531, -57.604] for lane in lanes: r = r + theta[0] * lane.gaussian() for fence in fences: r = r + theta[1] * fence.gaussian() for road in roads: r = r + theta[2] * road.gaussian(10.) if speed is not None: r = r + speed_import * theta[3] * feature.speed(speed) for traj in trajs: r = r + theta[4] * traj.gaussian() return r
def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.): if lanes is None: lanes = self.lanes if roads is None: roads = self.roads if fences is None: fences = self.fences if trajs is None: trajs = [c.linear for c in self.cars] elif isinstance(trajs, car.Car): trajs = [c.linear for c in self.cars if c!=trajs] r = 0.1*feature.control() theta = [1., -50., 10., 10., -60.] # Simple model # theta = [.959, -46.271, 9.015, 8.531, -57.604] for lane in lanes: r = r+theta[0]*lane.gaussian() for fence in fences: r = r+theta[1]*fence.gaussian() for road in roads: r = r+theta[2]*road.gaussian(10.) if speed is not None: r = r+speed_import*theta[3]*feature.speed(speed) for traj in trajs: r = r+theta[4]*traj.gaussian() return r
def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.): if lanes is None: lanes = self.lanes if roads is None: roads = self.roads if fences is None: fences = self.fences if trajs is None: trajs = [c.linear for c in self.cars] elif isinstance(trajs, car.Car): trajs = [c.linear for c in self.cars if c!=trajs] r = 0.1*feature.control()
def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.): # skapar simple reward for en bil if lanes is None: lanes = self.lanes if roads is None: roads = self.roads if fences is None: fences = self.fences if trajs is None: trajs = [c.linear for c in self.cars] elif isinstance(trajs, car.Car): trajs = [c.linear for c in self.cars if c != trajs] elif isinstance(trajs, static_obj.Car): trajs = [c.linear for c in self.cars if c != trajs] r = 0.1 * feature.control() theta = [1., -50., 10., 10., -60.] # Simple model # theta = [.959, -46.271, 9.015, 8.531, -57.604] # skapar alla lanes, fences, roads, speed och trajectory for alla bilar for lane in lanes: r = r + theta[0] * lane.gaussian() for fence in fences: # increase the negative reward for the fences so that the cars dont go outside of the road #r = r+theta[1]*fence.gaussian()*1000000 r = r + theta[1] * fence.gaussian() if roads == None: pass else: for road in roads: r = r + theta[2] * road.gaussian(10.) if speed is not None: r = r + speed_import * theta[3] * feature.speed(speed) try: #quick fix, if there is just 1 car it will not be a list for traj in trajs: r = r + theta[4] * traj.gaussian() except: r = r + theta[4] * trajs.gaussian() return r
def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.): if lanes is None: lanes = self.lanes if roads is None: roads = self.roads if fences is None: fences = self.fences if trajs is None: trajs = [c.linear for c in self.cars] elif isinstance(trajs, car.Car): trajs = [c.linear for c in self.cars if c != trajs] r = 0.1 * feature.control() """ # What is theta? First one is importance of staying in lanes, second is staying on the road entirely (not violating the outer fence) third is staying on the road also? fourth is maintaining desired speed fifth is ...? """ theta = [1., -50., 10., 10., -60.] # Simple model # theta = [.959, -46.271, 9.015, 8.531, -57.604] for lane in lanes: r = r + theta[0] * lane.gaussian() for fence in fences: r = r + theta[1] * fence.gaussian() for road in roads: r = r + theta[2] * road.gaussian(10.) if speed is not None: r = r + speed_import * theta[3] * feature.speed(speed) for traj in trajs: r = r + theta[4] * traj.gaussian() return r
vis = Visualizer(dyn.dt) vis.lanes.append(lane.StraightLane([0., -1.], [0., 1.], 0.13)) vis.lanes.append(vis.lanes[0].shifted(1)) vis.lanes.append(vis.lanes[0].shifted(-1)) vis.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi / 2., .1])) vis.cars.append( car.SimpleOptimizerCar(dyn, [0., 0.5, math.pi / 2., 0.], color='red')) r = -60. * vis.cars[0].linear.gaussian() r = r + vis.lanes[0].gaussian() r = r + vis.lanes[1].gaussian() r = r + vis.lanes[2].gaussian() r = r - 30. * vis.lanes[1].shifted(1).gaussian() r = r - 30. * vis.lanes[2].shifted(-1).gaussian() r = r + 30. * feature.speed(0.5) r = r + 10. * vis.lanes[0].gaussian(10.) r = r + .1 * feature.control() vis.cars[1].reward = r vis.main_car = vis.cars[0] vis.paused = True vis.set_heat(r) #vis.set_heat(vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian()) #vis.set_heat(-vis.cars[1].traj.gaussian()+vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian()) vis.run() if __name__ == '__main__' and len(sys.argv) == 1: import world as wrld import car world = wrld.world2() vis = Visualizer(0.1, name='replay') vis.use_world(world) vis.main_car = world.cars[0]
the_car = None for c in the_world.cars: if isinstance(c, car.UserControlledCar): the_car = c T = the_car.traj.T train = [] for fname in files: with open(fname) as f: us, xs = pickle.load(f) for t in range(T, len(xs[0]) - T, T): point = { 'x0': [xseq[t - 1] for xseq in xs], 'u': [useq[t:t + T] for useq in us] } train.append(point) theta = utils.vector(5) theta.set_value(np.array([1., -50., 10., 10., -60.])) r = 0.1 * feature.control() #features, thetas are weights for lane in the_world.lanes: r = r + theta[0] * lane.gaussian() for fence in the_world.fences: r = r + theta[1] * lane.gaussian() for road in the_world.roads: r = r + theta[2] * road.gaussian(10.) r = r + theta[3] * feature.speed(1.) for car in the_world.cars: if car != the_car: r = r + theta[4] * car.traj.gaussian() run_irl(the_world, the_car, r, theta, train)
dyn = dynamics.CarDynamics(0.1) vis = Visualizer(dyn.dt) vis.lanes.append(lane.StraightLane([0., -1.], [0., 1.], 0.13)) vis.lanes.append(vis.lanes[0].shifted(1)) vis.lanes.append(vis.lanes[0].shifted(-1)) vis.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi/2., .1])) vis.cars.append(car.SimpleOptimizerCar(dyn, [0., 0.5, math.pi/2., 0.], color='red')) r = -60.*vis.cars[0].linear.gaussian() r = r + vis.lanes[0].gaussian() r = r + vis.lanes[1].gaussian() r = r + vis.lanes[2].gaussian() r = r - 30.*vis.lanes[1].shifted(1).gaussian() r = r - 30.*vis.lanes[2].shifted(-1).gaussian() r = r + 30.*feature.speed(0.5) r = r + 10.*vis.lanes[0].gaussian(10.) r = r + .1*feature.control() vis.cars[1].reward = r vis.main_car = vis.cars[0] vis.paused = True vis.set_heat(r) #vis.set_heat(vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian()) #vis.set_heat(-vis.cars[1].traj.gaussian()+vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian()) vis.run() if __name__ == '__main__' and len(sys.argv)==1: import world as wrld import car world = wrld.world2() vis = Visualizer(0.1, name='replay') vis.use_world(world) vis.main_car = world.cars[0]
else: the_car = None for c in the_world.cars: if isinstance(c, car.UserControlledCar): the_car = c T = the_car.traj.T train = [] for fname in files: with open(fname) as f: us, xs = pickle.load(f) for t in range(T, len(xs[0])-T, T): point = { 'x0': [xseq[t-1] for xseq in xs], 'u': [useq[t:t+T] for useq in us] } train.append(point) theta = utils.vector(5) theta.set_value(np.array([1., -50., 10., 10., -60.])) r = 0.1*feature.control() for lane in the_world.lanes: r = r + theta[0]*lane.gaussian() for fence in the_world.fences: r = r + theta[1]*lane.gaussian() for road in the_world.roads: r = r + theta[2]*road.gaussian(10.) r = r + theta[3]*feature.speed(1.) for car in the_world.cars: if car!=the_car: r = r + theta[4]*car.traj.gaussian() run_irl(the_world, the_car, r, theta, train)