def plotGrid(theta): from grid_policy import GridPolicy grid = GridPolicy() X0 = grid.setGrid([-1., -1., theta, 0, 0, 0], [+1, +1, theta + 1e-3, 1e-3, 1e-3, 1e-3], 1e-2) V = sess.run(value.policy, feed_dict={value.x: np.hstack([X0, 0 * X0])}) plt.scatter(X0[:, 0].flat, X0[:, 1].flat, c=V.flat, linewidths=0, vmin=0, vmax=2) X0 = grid.setGrid([-1., -1., theta, 0, 0, 0], [+1, +1, theta + 1e-3, 1e-3, 1e-3, 1e-3], 8e-2) for x in sess.run(trajx.policy, feed_dict={trajx.x: np.hstack([X0, 0 * X0])}): X = np.reshape(x, [20, 6]) plt.plot(X[:, 0], X[:, 1])
def plotGrid(nets, theta=0, idxs={0: [-1, 1], 1: [-1, 1]}, x0=None, step=1e-2): from grid_policy import GridPolicy grid = GridPolicy() x0 = x0 if x0 is not None else zero(env.nx) xm = x0.copy() xM = x0.copy() + step / 10 for i, [vm, vM] in idxs.items(): xm[i] = vm xM[i] = vM X0 = grid.setGrid(xm, xM, 1e-2) V = nets.sess.run(nets.value.policy, feed_dict={nets.value.x: np.hstack([X0, 0 * X0])}) plt.scatter(X0[:, 0].flat, X0[:, 1].flat, c=V.flat, linewidths=0) X0 = grid.setGrid(xm, xM, step * 8) for x in nets.sess.run(nets.ptrajx.policy, feed_dict={nets.ptrajx.x: np.hstack([X0, 0 * X0])}): X = np.reshape(x, [20, env.nx]) plt.plot(X[:, 0], X[:, 1])
plt.subplot(1,2,2) plt.plot(Xa[:,0],Xa[:,1],color=s_m.to_rgba(Ta)) plt.colorbar(s_m) plt.subplot(1,2,1) plt.title('Approximate trajectories') plt.subplot(1,2,2) plt.title('Refined trajectories') saveCurrentFigure('bundle') ''' # --- # --- # --- ''' grid.setGrid([-1.,-1.,0,0,0,0],[+1,+1,1e-3,1e-3,1e-3,1e-3],2e-2) x0s = grid.grid x1 = np.zeros(6) x1s = np.vstack( [x1]*x0s.shape[0] ) xs = np.hstack([x0s,x1s]) #fig, axes = plt.subplots(nrows=2, ncols=2) fig = plt.figure() for iplot,inet in enumerate([1,4,7,9]): plt.subplot(2,2,iplot+1) plt.xlabel('Iteration #%d'%inet) nets.load('up%02d'%inet) vs = nets.sess.run(nets.value.policy,feed_dict={nets.value.x:xs}) us = nets.sess.run(nets.ptraju.policy,feed_dict={nets.ptraju.x:xs})[:,:2]
assert(checkPRM(prm.graph,True)==0) print 'Done with the PRM. ',time.ctime() # --- GRID --- # --- GRID --- # --- GRID --- RANDOM_SEED = int((time.time()%10)*1000) print "Seed = %d" % RANDOM_SEED np .random.seed (RANDOM_SEED) random.seed (RANDOM_SEED) #dataRootPath = dataRootPath + '/2dgrid' grid = GridPolicy(prm) EPS = 1e-3 grid.setGrid( np.concatenate([ env.qlow, zero(3) ]), np.concatenate([ env.qup , zero(3)+EPS ]), .1 ) #grid.setGrid( np.matrix([ -1., -1., 0, 0, 0, 0 ]).T, # np.matrix([ 1., 1., 0, 0, 0, 0 ]).T+EPS, .1 ) config(acado,'policy') acado.setup_async(32,200) if LOAD_GRID: grid.load(dataRootPath+'/grid.npy') if SAMPLE_GRID: print 'Sample the grid',time.ctime() grid.sample(subsample=1,verbose=True) np.save(dataRootPath+'/grid_sampled.npy',grid.data) print 'Sampling done',time.ctime()
print 'Connect all points to zero (at least tries)',time.ctime() connectToZero(graph) print 'Densify PRM',time.ctime() densifyPrm(graph) connexifyPrm(graph) prm.graph.save(dataRootPath) # --- GRID --- # --- GRID --- # --- GRID --- oprm = OptimalPRM.makeFromPRM(prm,acado=prm.connect.acado,stateDiff=PendulumStateDiff(2)) grid = GridPolicy(oprm) EPS = 1e-3 grid.setGrid([ -np.pi,-np.pi+EPS,0,0],[np.pi,np.pi-EPS,EPS,EPS],1.) if SAMPLE_GRID: print 'Sample the grid',time.ctime() grid.sample() else: grid.load(dataRootPath+'/grid.npy') if REFINE_GRID>3: print 'Fill the grid',time.ctime() refineGrid(data,NNEIGHBOR=30,PERCENTAGE=.9, RANDQUEUE=[ i for i,d in enumerate(data) if d.cost>100]) refineGrid(data,NNEIGHBOR=100,PERCENTAGE=.9, RANDQUEUE=[ i for i,d in enumerate(data) if d.cost>100]) np.save(dataRootPath+'/grid_filled.npy',data)