def control(self, steer, gas): print len(self.cache) # VIKTIGT: printar ut vilken tidsteg ar nu self.cache.append(None) if len(self.cache) >= 30: print "Exiting" exit() if self.movable == False: self.index += 1 # just to test, but the simple optimizer seem to be able to find the other cars and get a reward depending on that #print self.traj.reward(self.reward).eval() #IMPORTANT return if self.index < len(self.cache): self.u = self.cache[self.index] else: if self.optimizer is None: # skickar self.reward functionet till traj.reward # det som faktiskt blir skickad ar self._reward # detta behandlas i traj.reward for att far var reward for bilen r = self.traj.reward(self.reward) # skapar en instans av Maximizer for foljande reward och trajectory self.optimizer = utils.Maximizer(r, self.traj.u) #IMPORTANT: slow # maximerar rewarden med hjalp av maximizer self.optimizer.maximize() # cachar vad som har hand self.cache.append(self.u) # uppdaterar tiden nu self.sync(self.cache) # gar fram en tidsteg self.index += 1
def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H - reg[0] * I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
def control(self, steer, gas): if self.index<len(self.cache): self.u = self.cache[self.index] else: if self.optimizer is None: r = self.traj.reward(self.reward) self.optimizer = utils.Maximizer(r, self.traj.u) self.optimizer.maximize() self.cache.append(self.u) self.sync(self.cache) self.index += 1
def control(self, steer, gas): if self.optimizer is None: u = sum(log_p for log_p in self.log_ps) / len(self.log_ps) self.prenormalize = th.function([], None, updates=[(log_p, log_p - u) for log_p in self.log_ps]) s = tt.log(sum(tt.exp(log_p) for log_p in self.log_ps)) self.normalize = th.function([], None, updates=[(log_p, log_p - s) for log_p in self.log_ps]) self.update_belief = th.function( [], None, updates=[(log_p, log_p + self.human.past.log_p(reward('past'))) for reward, log_p in zip(self.rewards, self.log_ps)]) self.normalize() self.t = 0 if self.dumb: self.useq = self.objective self.optimizer = True else: if hasattr(self.objective, '__call__'): obj_h = sum([ traj_h.total(reward('traj')) for traj_h, reward in zip(self.traj_hs, self.rewards) ]) var_h = sum([traj_h.u for traj_h in self.traj_hs], []) obj_r = sum( tt.exp(log_p) * self.objective(traj_h) for traj_h, log_p in zip(self.traj_hs, self.log_ps)) self.optimizer = utils.NestedMaximizer( obj_h, var_h, obj_r, self.traj.u) else: obj_r = self.objective self.optimizer = utils.Maximizer(self.objective, self.traj.u) if self.t == self.T: self.update_belief() self.t = 0 if self.dumb: self.u = self.useq[0] self.useq = self.useq[1:] if self.t == 0: self.prenormalize() self.normalize() for traj_h in self.traj_hs: traj_h.x0.set_value(self.human.x) if not self.dumb: self.optimizer.maximize(bounds=self.bounds) for log_p in self.log_ps: print '%.2f' % np.exp(log_p.get_value()), print
def control(self, _steer, _gas): if self.model == None: raise Exception("NeuralCar.model is None") if self.mu == 1.0: self.u = self.model.predict(np.array([self.x]))[0] return if self.optimizer is None: r = self.traj.total(self.reward) self.optimizer = utils.Maximizer(r, self.traj.u) self.optimizer.maximize() self.u = (1 - self.mu) * self.u + self.mu * self.model.predict( np.array([self.x]))[0]
def control(self, steer, gas): print len(self.cache) # VIKTIGT: printar ut vilken tidsteg ar nu if self.index < len(self.cache): self.u = self.cache[self.index] else: if self.optimizer is None: # skickar self.reward functionet till traj.reward # det som faktiskt blir skickad ar self._reward # detta behandlas i traj.reward for att far var reward for bilen r = self.traj.reward(self.reward) # skapar en instans av Maximizer for foljande reward och trajectory self.optimizer = utils.Maximizer(r, self.traj.u) #IMPORTANT: slow # maximerar rewarden med hjalp av maximizer self.optimizer.maximize() # cachar vad som har hand self.cache.append(self.u) # uppdaterar tiden nu self.sync(self.cache) # gar fram en tidsteg self.index += 1
def control(self, steer, gas): if self.nested: if self.nested_optimizer is None: reward_h, reward_r = self._nested_rewards reward_h = self.traj_h.reward(reward_h) reward_r = self.traj.reward(reward_r) self.nested_optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.human.x) self.nested_optimizer.maximize(bounds = self.bounds) else: print len(self.cache) if self.index<len(self.cache): self.u = self.cache[self.index] else: if self.simple_optimizer is None: r = self.traj.reward(self._simple_reward) self.simple_optimizer = utils.Maximizer(r, self.traj.u) #TODO: make sure these bounds are correct, and that we shouldn't add bounded control to reward function self.simple_optimizer.maximize(bounds = self.bounds) self.cache.append(self.u) self.index += 1
def control(self, _steer, _gas): if self.copyx != None: print(self.copyx.shape[0], " examples") dists = np.array([np.linalg.norm(self.x - x) for x in self.copyx]) i = np.argmin(dists) self.social_u.set_value(self.copyu[i]) print("SOCIAL U_i", i) self.l.set_value(self.l_default) if dists[i] > 1.0: print("ignoring social") self.l.set_value(0) else: self.l.set_value(self.l_default) else: self.l.set_value(0) if self.optimizer is None: r = self.traj.total(self.reward) - self.l * (self.traj.u[0] - self.social_u).norm(2) self.optimizer = utils.Maximizer(r, self.traj.u) self.optimizer.maximize()
def optimizer(self): if self._optimizer is None: self._optimizer = utils.Maximizer(self._objective, self._variables) return self._optimizer
def control(self, steer, gas): if self.optimizer is None: r = self.traj.total(self.reward) self.optimizer = utils.Maximizer(r, self.traj.u) self.optimizer.maximize()
t = idx_u*self.step_per_u+idx r_list.append(reward(t, self.x[t], self.u[idx_u])) #r = [reward(t, self.x[t], self.u[t]) for t in range(self.T)] return sum(r_list) """ g = [utils.grad(r[t], self.x[t]) for t in range(self.T)] for t in reversed(range(self.T-1)): g[t] = g[t]+tt.dot(g[t+1], utils.jacobian(self.x[t+1], self.x[t])) for t in range(self.T): g[t] = tt.dot(g[t], utils.jacobian(self.x[t], self.u[t]))+utils.grad(r[t], self.u[t], constants=[self.x[t]]) return sum(r), {self.u[t]: g[t] for t in range(self.T)} """ if __name__ == '__main__': from dynamics import CarDynamics import math dyn = CarDynamics(0.1) traj = Trajectory(5, dyn) l = lane.StraightLane([0., -1.], [0., 1.], .1) reward = feature.speed()+l.feature()#+feature.speed() r = traj.reward(reward) #traj.x0.value = np.asarray([0., 0., math.pi/2, 1.]) traj.x0.set_value([0.1, 0., math.pi/2, 1.]) optimizer = utils.Maximizer(r, traj.u) import time t = time.time() for i in range(1): optimizer.maximize(bounds=[(-1., 1.), (-2, 2.)]) print (time.time()-t)/1. print [u.get_value() for u in traj.u]