def routine_gen(): # SS = gen_system_example_suspension() timestr = '1558459899p686552_example_suspension_model_known' folderstr = 'example_systems' SS1 = load_system(folderstr,timestr) check_olmss(SS1) # Policy gradient setup t_start = time() K0_method = 'are_perturbed' K0 = set_initial_gains(SS1,K0_method=K0_method) PGO = policy_gradient_setup(SS1) filename_out = 'policy_gradient_options.pickle' path_out = os.path.join(SS1.dirname,filename_out) pickle_export(SS1.dirname,path_out,PGO) t_end = time() print('Initialization completed after %.3f seconds' % (t_end-t_start)) SS1,histlist1 = run_policy_gradient(SS1,PGO) # Find optimal control ignoring noise SS2 = copy(SS1) SS2.set_a(np.zeros_like(SS2.a)) SS2.set_b(np.zeros_like(SS2.b)) SS2.setK(K0) PGO.eta = 1e-5 PGO.epsilon = (1e-1)*SS2.Kare.size SS2,histlist2 = run_policy_gradient(SS2,PGO) SS1.setK(SS2.Kare) SS1.setK(SS2.K) dirname_in = os.path.join(folderstr,timestr) chist_data = calc_comparison_costs(SS1,SS2,histlist1,histlist2) dirname_out = copy(dirname_in) filename_out = 'chist_data.pickle' path_out = os.path.join(dirname_out,filename_out) pickle_export(dirname_out,path_out,chist_data) plot_results(SS1,SS2,chist_data,dirname_in)
SS.setK(np.zeros([SS.m, SS.n])) # Policy gradient options PGO = PolicyGradientOptions(epsilon=(1e-2) * SS.Kare.size, eta=1e-3, max_iters=1000, disp_stride=1, keep_hist=True, opt_method='proximal', keep_opt='last', step_direction='gradient', stepsize_method='constant', exact=True, regularizer=Regularizer('vec1'), regweight=1.0, stop_crit='gradient', fbest_repeat_max=0, display_output=True, display_inplace=True, slow=False) # Run (regularized) policy gradient run_policy_gradient(SS, PGO) # Print the regularized optimal gains (from proximal gradient optimization) # and the unregularized optimal gains (from solving a Riccati equation) print('Optimized sparse gains') print(SS.K) print('Riccati gains') print(SS.Kare)
def routine_gen(): folderstr = 'systems' timestr = str(time()).replace('.', 'p') dirname_in = os.path.join(folderstr, timestr) create_directory(dirname_in) nSS = 20 # Number of independent runs/systems all_dict = { 'gradient': { 'costnorm': [], 'gradnorm': [] }, 'natural_gradient': { 'costnorm': [], 'gradnorm': [] }, 'gauss_newton': { 'costnorm': [], 'gradnorm': [] } } for i in range(nSS): SS = gen_system_mult(n=10, m=10, safety_margin=0.3, noise='olmss_weak', mult_noise_method='random', SStype='random', seed=-1, saveSS=False) # Policy gradient setup K0_method = 'zero' K0 = set_initial_gains(SS, K0_method=K0_method) step_direction_dict = { 'gradient': { 'eta': 1e-5, 'max_iters': 5000 }, 'natural_gradient': { 'eta': 1e-4, 'max_iters': 2000 }, 'gauss_newton': { 'eta': 1 / 2, 'max_iters': 10 } } sleep(1) for step_direction in step_direction_dict: SS.setK(K0) t_start = time() eta = step_direction_dict[step_direction]['eta'] max_iters = step_direction_dict[step_direction]['max_iters'] PGO = policy_gradient_setup(SS, eta, step_direction, max_iters) t_end = time() print('Initialization completed after %.3f seconds' % (t_end - t_start)) SS, histlist = run_policy_gradient(SS, PGO) costnorm = (histlist[2] / SS.ccare) - 1 gradnorm = la.norm(histlist[1], ord='fro', axis=(0, 1)) all_dict[step_direction]['costnorm'].append(costnorm) all_dict[step_direction]['gradnorm'].append(gradnorm) filename_out = 'monte_carlo_all_dict.pickle' path_out = os.path.join(dirname_in, filename_out) pickle_export(dirname_in, path_out, all_dict) plot_data(all_dict, dirname_in)
def routine_gen(): # folderstr = 'systems' # timestr = str(time()).replace('.','p') # dirname_in = os.path.join(folderstr,timestr) # create_directory(dirname_in) nSS = 20 # Number of independent runs # Settings for backtracking line search stepsize_method = 'backtrack' nr = 100000 PGO_dict = {'gradient_model_free': {'eta': 1e-1, 'max_iters': 20, 'exact': False}, 'gradient': {'eta': 1e-1, 'max_iters': 20, 'exact': True}, 'natural_gradient': {'eta': 1e-1, 'max_iters': 20, 'exact': True}, 'gauss_newton': {'eta': 1/2, 'max_iters': 20, 'exact': True}} all_dict = {key: {'costnorm':[],'gradnorm':[]} for key in PGO_dict.keys()} # Generate system from scratch seed = 1 # SS = gen_system_erdos_renyi(n=4, # diffusion_constant=1.0, # leakiness_constant=0.1, # time_constant=0.05, # leaky=True, # seed=seed) # SS = gen_system_erdos_renyi(n=2, # diffusion_constant=1.0, # leakiness_constant=0.1, # time_constant=0.05, # leaky=True, # seed=seed) # Load system folderstr = 'example_systems' timestr = '1587086073p9661696_example_network_all_steps_backtrack' dirname_in = os.path.join(folderstr, timestr) filename_in = os.path.join(dirname_in, 'system_init.pickle') SS = pickle_import(filename_in) for i in range(nSS): # Policy gradient setup K0_method = 'zero' K0 = set_initial_gains(SS,K0_method=K0_method) sleep(0.5) for step_direction in PGO_dict: SS.setK(K0) t_start = time() eta = PGO_dict[step_direction]['eta'] max_iters = PGO_dict[step_direction]['max_iters'] exact = PGO_dict[step_direction]['exact'] PGO = policy_gradient_setup(SS, eta, step_direction, max_iters, exact, stepsize_method, nr) t_end = time() print('Initialization completed after %.3f seconds' % (t_end-t_start)) SS, histlist = run_policy_gradient(SS,PGO) costnorm = (histlist[2]/SS.ccare)-1 gradnorm = la.norm(histlist[1], ord='fro', axis=(0,1)) all_dict[step_direction]['costnorm'].append(costnorm) all_dict[step_direction]['gradnorm'].append(gradnorm) filename_out = 'monte_carlo_all_dict.pickle' path_out = os.path.join(dirname_in,filename_out) pickle_export(dirname_in,path_out,all_dict) plot_data(all_dict,dirname_in)
def traverse_sparsity(SS, PGO, optiongroup, optiongroup_dir): # Sparsity traversal settings sparsity_required = 0.95 sparse_thresh = 0.001 plt.ioff() img_folder = 'sparsity_images' img_dirname_out = os.path.join(optiongroup_dir, img_folder) create_directory(img_dirname_out) filename_out_pre = 'sparsity_are' plot_sparse(img_dirname_out, filename_out_pre, SS.Kare, SS.ccare, sparse_thresh, PGO, are_flag=True) regweight_ratio = np.sqrt(2) if optiongroup == 'proximal_gradient_GN_PI': eta_ratio = 1 else: eta_ratio = (1 / regweight_ratio)**np.sqrt( regweight_ratio) # empirically works well sparsity_data = [] img_pattern = 'sparsity%02d' iterc = 0 iterc_max = 18 stop = False sparsity_prev = 0 sparsity_max = 0 while not stop: # Policy gradient t_start = time() SS, hist_list = run_policy_gradient(SS, PGO) t_end = time() filename_out = 'system_%d_regweight_%.3f.pickle' % (iterc, PGO.regweight) filename_out = filename_out.replace('.', 'p') path_out = os.path.join(optiongroup_dir, filename_out) pickle_export(optiongroup_dir, path_out, SS) # Plotting filename_out_pre = img_pattern % iterc ax_im, ax_im_bw, ax_hist, img, img_bw, cbar, sparsity = plot_sparse( img_dirname_out, filename_out_pre, SS.K, SS.c, sparse_thresh, PGO) plt.close('all') sparsity_data.append( [PGO.regweight, sparsity, SS.K, SS.c, t_end - t_start, hist_list]) if sparsity > sparsity_required: stop = True # if sparsity < sparsity_prev: # stop = True # if sparsity_max > 0.60 and sparsity < 0.05: # stop = True if sparsity_max > 0.60 and sparsity < sparsity_prev: stop = True if iterc >= iterc_max - 1: stop = True PGO.eta *= eta_ratio PGO.regweight *= regweight_ratio sparsity_prev = sparsity sparsity_max = np.max([sparsity, sparsity_max]) iterc += 1 # input("Press [enter] to continue.") # vidname = 'sparsity_evolution' # vidsave(img_folder,img_pattern,vidname) filename_out = 'sparsity_data.pickle' path_out = os.path.join(optiongroup_dir, filename_out) pickle_export(optiongroup_dir, path_out, sparsity_data) return sparsity_data