def __init__(self, datafile=dataRootPath + '/grid.npy'): self.grid = GridPolicy() self.grid.load(datafile) for d in self.grid.data: # Correct error at the end of U traj d.U[-1, :] = d.U[-2, :] # For quad copter: remove terminal traj self.data = [ d for d in self.grid.data if len(d.X) > 0 and abs(d.x0[2, 0]) < .7 ]
def plotGrid(theta): from grid_policy import GridPolicy grid = GridPolicy() X0 = grid.setGrid([-1., -1., theta, 0, 0, 0], [+1, +1, theta + 1e-3, 1e-3, 1e-3, 1e-3], 1e-2) V = sess.run(value.policy, feed_dict={value.x: np.hstack([X0, 0 * X0])}) plt.scatter(X0[:, 0].flat, X0[:, 1].flat, c=V.flat, linewidths=0, vmin=0, vmax=2) X0 = grid.setGrid([-1., -1., theta, 0, 0, 0], [+1, +1, theta + 1e-3, 1e-3, 1e-3, 1e-3], 8e-2) for x in sess.run(trajx.policy, feed_dict={trajx.x: np.hstack([X0, 0 * X0])}): X = np.reshape(x, [20, 6]) plt.plot(X[:, 0], X[:, 1])
def plotGrid(nets, theta=0, idxs={0: [-1, 1], 1: [-1, 1]}, x0=None, step=1e-2): from grid_policy import GridPolicy grid = GridPolicy() x0 = x0 if x0 is not None else zero(env.nx) xm = x0.copy() xM = x0.copy() + step / 10 for i, [vm, vM] in idxs.items(): xm[i] = vm xM[i] = vM X0 = grid.setGrid(xm, xM, 1e-2) V = nets.sess.run(nets.value.policy, feed_dict={nets.value.x: np.hstack([X0, 0 * X0])}) plt.scatter(X0[:, 0].flat, X0[:, 1].flat, c=V.flat, linewidths=0) X0 = grid.setGrid(xm, xM, step * 8) for x in nets.sess.run(nets.ptrajx.policy, feed_dict={nets.ptrajx.x: np.hstack([X0, 0 * X0])}): X = np.reshape(x, [20, env.nx]) plt.plot(X[:, 0], X[:, 1])
class Dataset: def __init__(self, datafile=dataRootPath + '/grid.npy'): self.grid = GridPolicy() self.grid.load(datafile) for d in self.grid.data: # Correct error at the end of U traj d.U[-1, :] = d.U[-2, :] # For quad copter: remove terminal traj self.data = [ d for d in self.grid.data if len(d.X) > 0 and abs(d.x0[2, 0]) < .7 ] def set(self): data = self.data SHIFT = 4 nex = lambda X: np.vstack([X[SHIFT:, :]] + [X[-1:, :]] * SHIFT ) # shit 5 times xs = [d.X for d in data] self.xs = np.array(np.vstack(xs)) us = [d.U for d in data] self.us = np.array(np.vstack(us)) nexs = [nex(d.X) for d in data] self.nexs = np.array(np.vstack(nexs)) vs = [d.cost - d.T for d in data] self.vs = np.expand_dims(np.array(np.concatenate(vs)), 1) subtraj = lambda X, i: np.ravel(np.vstack([X[i:, :]] + [X[-1:, :]] * i) ) xplust = lambda X, T: np.hstack([X, np.expand_dims(T[-1] - T, 1)]) trajxs = [subtraj(d.X, i) for d in data for i in range(len(d.X))] self.trajxs = np.array(np.vstack(trajxs)) trajus = [subtraj(d.U, i) for d in data for i in range(len(d.U))] self.trajus = np.array(np.vstack(trajus)) return self
for ik,k in enumerate(ks): costs[ig,ik] = graph.edgeTime[k] prmcosts = np.load(dataRootPath+'/prmmeancost.npy') plt.plot(np.arange(len(graphs))*140,np.mean(costs,1)) plt.plot(np.arange(10)*300,np.mean(prmcosts,1)) ''' # --- # --- # --- nets.load('up%02d' % NG) from grid_policy import GridPolicy grid = GridPolicy() '''grid.setGrid([-1.,-1.,0,0,0,0],[+1,+1,1e-3,1e-3,1e-3,1e-3],25e-2) x1 = zero(6) Tmax = 0. trajs = [] for x in grid.grid: x0 = np.matrix(x).T Xp,Up,Tp = trajFromTraj(x0,x1) Xa,Ua,Ta = optNet(x0,x1) trajs.append( [[x0,x1],[Xp,Up,Tp],[Xa,Ua,Ta]] ) Tm = min([ t[2][2] for t in trajs]+[ t[1][2] for t in trajs]) TM = max([ t[2][2] for t in trajs]+[ t[1][2] for t in trajs]) # for [x0,x1],[Xp,Up,Tp],[Xa,Ua,Ta] in trajs: # plt.figure(1)
prm.graph.save(dataRootPath) assert(checkPRM(prm.graph,True)==0) print 'Done with the PRM. ',time.ctime() # --- GRID --- # --- GRID --- # --- GRID --- RANDOM_SEED = int((time.time()%10)*1000) print "Seed = %d" % RANDOM_SEED np .random.seed (RANDOM_SEED) random.seed (RANDOM_SEED) #dataRootPath = dataRootPath + '/2dgrid' grid = GridPolicy(prm) EPS = 1e-3 grid.setGrid( np.concatenate([ env.qlow, zero(3) ]), np.concatenate([ env.qup , zero(3)+EPS ]), .1 ) #grid.setGrid( np.matrix([ -1., -1., 0, 0, 0, 0 ]).T, # np.matrix([ 1., 1., 0, 0, 0, 0 ]).T+EPS, .1 ) config(acado,'policy') acado.setup_async(32,200) if LOAD_GRID: grid.load(dataRootPath+'/grid.npy') if SAMPLE_GRID: print 'Sample the grid',time.ctime() grid.sample(subsample=1,verbose=True)
if EXTEND_PRM>0: print 'Connect all points to zero (at least tries)',time.ctime() connectToZero(graph) print 'Densify PRM',time.ctime() densifyPrm(graph) connexifyPrm(graph) prm.graph.save(dataRootPath) # --- GRID --- # --- GRID --- # --- GRID --- oprm = OptimalPRM.makeFromPRM(prm,acado=prm.connect.acado,stateDiff=PendulumStateDiff(2)) grid = GridPolicy(oprm) EPS = 1e-3 grid.setGrid([ -np.pi,-np.pi+EPS,0,0],[np.pi,np.pi-EPS,EPS,EPS],1.) if SAMPLE_GRID: print 'Sample the grid',time.ctime() grid.sample() else: grid.load(dataRootPath+'/grid.npy') if REFINE_GRID>3: print 'Fill the grid',time.ctime() refineGrid(data,NNEIGHBOR=30,PERCENTAGE=.9, RANDQUEUE=[ i for i,d in enumerate(data) if d.cost>100]) refineGrid(data,NNEIGHBOR=100,PERCENTAGE=.9, RANDQUEUE=[ i for i,d in enumerate(data) if d.cost>100])