from ltimult import LQRSysMult if __name__ == "__main__": # System problem data A = np.array([[0.7, 0.3, 0.2], [-0.2, 0.4, 0.5], [-0.4, 0.2, -0.3]]) B = np.array([[0.5, -0.3], [0.8, 0.3], [0.1, 0.9]]) Q = np.eye(3) R = np.eye(2) S0 = np.eye(3) Aa = 0.1 * np.array([[2, 9, -6], [9, 9, 4], [-9, -2, 5]]) Aa = Aa[:, :, np.newaxis] Bb = 0.1 * np.array([[8, 8], [3, 3], [-6, 6]]) Bb = Bb[:, :, np.newaxis] a = np.array([[0.1]]) b = np.array([[0.1]]) SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) # Start with an initially stabilizing (feasible) controller; # for this example the system is open-loop mean-square stable SS.setK(np.zeros([SS.m, SS.n])) # Policy gradient options PGO = PolicyGradientOptions(epsilon=(1e-2) * SS.Kare.size, eta=1e-3, max_iters=1000, disp_stride=1, keep_hist=True, opt_method='proximal', keep_opt='last', step_direction='gradient', stepsize_method='constant',
def gen_system_mult(n=8,m=8,safety_margin=0.3,noise='weak', mult_noise_method='random',SStype='ER', seed=None,saveSS=True): timestr = str(time()).replace('.','p') dirname_out = os.path.join('systems',timestr) if seed is not None: set_rng_seed(seed) if SStype == 'random': A,B = gen_system_AB_rand(n,m,safety_margin) elif SStype == 'ER': A,B = gen_system_AB_erdos_renyi(n,dirname_out=dirname_out) m = B.shape[1] elif SStype == 'example': A = np.array([[0.8,0.3],[-0.2,0.7]]) B = np.array([[0.5,0.3]]).T Q = np.eye(2) R = np.eye(1) S0 = np.eye(2) Aa = np.array([[0.2,0.3],[0.2,0.3]]) Aa = Aa[:,:,np.newaxis] Bb = np.array([[0.2,0.3]]).T Bb = Bb[:,:,np.newaxis] a = np.array([[0.3]]) b = np.array([[0.3]]) SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS # LQR cost matrices Q = np.eye(n) # Q = randn(n,n) # Q = np.dot(Q,Q') R = np.eye(m) # R = randn(m,m) # R = np.dot(R,R') # Initial state distribution covariance # S0 = randn(n,n) # S0 = np.dot(S0,S0') S0 = np.eye(n) # Multiplicative noise data p = 2 # Number of multiplicative noises on A q = 2 # Number of multiplicative noises on B if mult_noise_method == 'random': Aa = randn(n,n,p) Bb = randn(n,m,q) elif mult_noise_method == 'rowcol': # Pick a random row and column Aa = np.zeros([n,n,p]) Bb = np.zeros([n,m,q]) Aa[randint(n),:,0] = np.ones(n) Aa[:,randint(n),1] = np.ones(n) Bb[randint(n),:,0] = np.ones(m) Bb[:,randint(m),1] = np.ones(n) elif mult_noise_method == 'random_plus_rowcol': Aa = 0.3*randn(n,n,p) Bb = 0.3*randn(n,m,q) # Pick a random row and column Aa[randint(n),:,0] = np.ones(n) Aa[:,randint(n),1] = np.ones(n) Bb[randint(n),:,0] = np.ones(m) Bb[:,randint(m),1] = np.ones(n) incval = 1.05 decval = 1.00*(1/incval) weakval = 0.90 # a = randn([p,1]) # b = randn([q,1]) a = np.ones([p,1]) b = np.ones([q,1]) a = a*(float(1)/(p*n**2)) # scale as rough heuristic b = b*(float(1)/(q*m**2)) # scale as rough heuristic # noise = 'weak' if noise=='weak' or noise=='critical': # Ensure near-critically mean square stabilizable # increase noise if not P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False) mss = True while mss: if Kare is None: mss = False else: a = incval*a b = incval*b P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False) # Extra mean square stabilizability margin a = decval*a b = decval*b if noise == 'weak': # print('Multiplicative noise set weak') a = weakval*a b = weakval*b elif noise=='olmss_weak' or noise=='olmss_critical': # Ensure near-critically open-loop mean-square stable # increase noise if not K0 = np.zeros([m,n]) P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') mss = True while mss: if P is None: mss = False else: a = incval*a b = incval*b P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') # Extra mean square stabilizability margin a = decval*a b = decval*b if noise == 'olmss_weak': # print('Multiplicative noise set to open-loop mean-square stable') a = weakval*a b = weakval*b elif noise=='olmsus': # Ensure near-critically open-loop mean-square unstable # increase noise if not K0 = np.zeros([m,n]) P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') mss = True while mss: if P is None: mss = False else: a = incval*a b = incval*b P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') # # Extra mean square stabilizability margin # a = decval*a # b = decval*b # print('Multiplicative noise set to open-loop mean-square unstable') elif noise=='none': print('MULTIPLICATIVE NOISE SET TO ZERO!!!') a = np.zeros([p,1]) # For testing only - no noise b = np.zeros([q,1]) # For testing only - no noise else: raise Exception('Invalid noise setting chosen') SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) if saveSS: SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def gradient_estimate_variance(noise, textfile, seed=1): npr.seed(seed) # Generate the system # Two states, diffusion w/ friction and multiplicative noise n = 2 m = 1 A = np.array([[0.8, 0.1], [0.1, 0.8]]) B = np.array([[1.0], [0.0]]) a = np.array([[0.1]]) Aa = np.array([[0.0, 1.0], [1.0, 0.0]])[:, :, np.newaxis] b = np.array([[0.0]]) Bb = np.array([[0.0], [0.0]])[:, :, np.newaxis] Q = np.eye(2) R = np.eye(1) S0 = np.eye(2) if noise: SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) else: SS = LQRSys(A, B, Q, R, S0) # Initialize # K0 = 0.01*np.ones([m,n]) K0 = np.zeros([m, n]) SS.setK(K0) K = np.copy(SS.K) print(SS.c) # Number of gradient estimates to collect for variance analysis n_iterc = 10 # Rollout length nt = 40 # Number of rollouts nr = int(1e4) # Exploration radius ru = 1e-2 G_est_all = np.zeros([n_iterc, m, n]) error_angle_all = np.zeros(n_iterc) error_scale_all = np.zeros(n_iterc) error_norm_all = np.zeros(n_iterc) headerstr_list = [] headerstr_list.append(' trial ') headerstr_list.append('error angle (deg)') headerstr_list.append(' error scale ') headerstr_list.append(' error norm') headerstr_list.append('true gradient norm') headerstr = " | ".join(headerstr_list) printout(headerstr, textfile) t_start = time() for iterc in range(n_iterc): # Estimate gradient using zeroth-order optimization # Draw random gain deviations and scale to Frobenius norm ball Uraw = npr.normal(size=[nr, SS.m, SS.n]) U = ru * Uraw / la.norm(Uraw, 'fro', axis=(1, 2))[:, None, None] # Stack dynamics matrices into a 3D array Kd = K + U # Simulate all rollouts together c = np.zeros(nr) # Draw random initial states x = npr.multivariate_normal(np.zeros(SS.n), SS.S0, nr) for t in range(nt): # Accumulate cost c += np.einsum('...i,...i', x, np.einsum('jk,...k', SS.QK, x)) # Calculate closed-loop dynamics AKr = SS.A + np.einsum('...ik,...kj', SS.B, Kd) if noise: for i in range(SS.p): AKr += (SS.a[i]**0.5) * npr.randn( nr)[:, np.newaxis, np.newaxis] * np.repeat( SS.Aa[np.newaxis, :, :, i], nr, axis=0) for j in range(SS.q): AKr += np.einsum( '...ik,...kj', (SS.b[j]**0.5) * npr.randn(nr)[:, np.newaxis, np.newaxis] * np.repeat(SS.Bb[np.newaxis, :, :, j], nr, axis=0), Kd) # Transition the state x = np.einsum('...jk,...k', AKr, x) # Estimate gradient Glqr = np.einsum('i,i...', c, U) Glqr *= K.size / (nr * (ru**2)) G_est = Glqr G_act = SS.grad error_angle = (360 / (2 * np.pi)) * np.arccos( np.sum((G_est * G_act)) / (la.norm(G_est) * la.norm(G_act))) error_scale = (la.norm(G_est) / la.norm(G_act)) error_norm = la.norm(G_est - G_act) G_est_all[iterc] = G_est error_angle_all[iterc] = error_angle error_scale_all[iterc] = error_scale error_norm_all[iterc] = error_norm # Print iterate messages printstrlist = [] printstrlist.append("{0:9d}".format(iterc + 1)) printstrlist.append(" {0:6.2f} / 360".format(error_angle)) printstrlist.append("{0:8.4f} / 1".format(error_scale)) printstrlist.append("{0:9.4f}".format(error_norm)) printstrlist.append("{0:9.4f}".format(la.norm(G_act))) printstr = ' | '.join(printstrlist) printout(printstr, textfile) t_end = time() printout('', textfile) printout('mean of error angle', textfile) printout('%f' % np.mean(error_angle_all), textfile) printout('mean of error scale', textfile) printout('%f' % np.mean(error_scale_all), textfile) printout('mean of error norm', textfile) printout('%f' % np.mean(error_norm_all), textfile) # printout('standard deviation of raw gradient estimate, entrywise',textfile) # printout('%f' % np.std(G_est_all,0),textfile) printout('average time per gradient estimate (s)', textfile) printout("%.3f" % ((t_end - t_start) / n_iterc), textfile) printout('', textfile) return G_act, G_est_all, error_angle_all, error_scale_all, error_norm_all
def gen_system_example_suspension(): n = 4 m = 1 m1 = 500 m2 = 100 k1 = 5000 k2 = 20000 b1 = 200 b2 = 4000 A = np.array([[0,1,0,0], [-(b1*b2)/(m1*m2),0,((b1/m1)*((b1/m1)+(b1/m2)+(b2/m2)))-(k1/m1),-(b1/m1)], [b2/m2,0,-((b1/m1)+(b1/m2)+(b2/m2)),1], [k2/m2,0,-((k1/m1)+(k1/m2)+(k2/m2)),0]]) B = 1000*np.array([[0], [1/m1], [0], [(1/m1)+(1/m2)]]) C = np.eye(n) D = np.zeros([n,m]) sysc = (A,B,C,D) sysd = scipy.signal.cont2discrete(sysc,dt=0.5,method='bilinear') A = sysd[0] B = sysd[1] # Multiplicative noise data p = 4 q = 1 a = 0.1*np.ones(p) b = 0.2*np.ones(q) Aa = np.zeros([n,n,p]) for i in range(p): Aa[:,i,i] = np.ones(n) Bb = np.zeros([n,m,q]) for j in range(q): Bb[:,j,j] = np.ones(n) Q = np.eye(n) R = np.eye(m) S0 = np.eye(n) # Ensure near-critically mean square stabilizable - increase noise if not mss = False SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) while not mss: if SS.ccare < np.inf: mss = True else: a = a*0.95 b = b*0.95 SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) timestr = str(time()).replace('.','p') dirname_out = os.path.join('systems',timestr) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def gen_system_erdos_renyi(n, diffusion_constant=1.0, leakiness_constant=0.1, time_constant=0.05, leaky=True, seed=None, detailed_outputs=False, dirname_out='.'): npr.seed(seed) timestr = str(time()).replace('.', 'p') dirname_out = os.path.join('systems', timestr) # ER probability # crp = 7.0 # erp = (np.log(n+1)+crp)/(n+1) # almost surely connected prob=0.999 mean_degree = 4.0 # should be > 1 for giant component to exist erp = mean_degree / (n - 1.0) n_edges = 0 # Create random Erdos-Renyi graph # Adjacency matrix adjacency = np.zeros([n, n]) for i in range(n): for j in range(i + 1, n): if npr.rand() < erp: n_edges += 1 adjacency[i, j] = npr.randint(low=1, high=4) adjacency[j, i] = np.copy(adjacency[i, j]) # Degree matrix degree = np.diag(adjacency.sum(axis=0)) # Graph Laplacian laplacian = degree - adjacency # Continuous-time dynamics matrices Ac = -laplacian * diffusion_constant Bc = np.eye( n ) / time_constant # normalize just to make B = np.eye(n) later in discrete-time if leaky: Fc = leakiness_constant * np.eye(n) Ac = Ac - Fc # Plot visualize_graph_ring(adjacency, n, dirname_out) # Forward Euler discretization A = np.eye(n) + Ac * time_constant B = Bc * time_constant n = np.copy(n) m = np.copy(n) # Multiplicative noises a = 0.005 * npr.randint(low=1, high=5, size=n_edges) * np.ones(n_edges) Aa = np.zeros([n, n, n_edges]) k = 0 for i in range(n): for j in range(i + 1, n): if adjacency[i, j] > 0: Aa[i, i, k] = 1 Aa[j, j, k] = 1 Aa[i, j, k] = -1 Aa[j, i, k] = -1 k += 1 b = 0.05 * npr.randint(low=1, high=5, size=n) * np.ones(n) Bb = np.zeros([n, m, m]) for i in range(n): Bb[i, i, i] = 1 Q = np.eye(n) R = np.eye(m) S0 = np.eye(n) SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out, filename_only) pickle_export(dirname_out, filename_out, SS) return SS