def get_Model_from_COO(coo_filename, rs_filename, filename='Transition_dict', n_actions=1, nt=None, dt=None, F=None, startpos=None, endpos=None): print("Building Model") global state_list global base_path global save_path start_time = time.time() #setup grid g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, path_mat, params, param_str = setup_grid( num_actions=n_actions) #name of pickle file containing transtion prob in dictionary format filename = filename + str(n_actions) + 'a' base_path = join(getcwd(), 'DP/Trans_matxs/') save_path = base_path + filename if exists(save_path): print("Folder Already Exists !!") return #build probability transition dictionary state_list = g.ac_state_space() init_transition_dict = initialise_dict(g) transition_dict = convert_COO_to_dict(init_transition_dict, g, coo_filename, rs_filename) build_time = time.time() - start_time #save dictionary to file data = params, param_str, g.reward_structure, build_time write_files(transition_dict, filename, data) total_time = time.time() - start_time #command line outputs print("Dictionary saved !") print("Build Time = ", build_time / 60, " mins") print("Total TIme = ", total_time / 60, "mins")
print("In Runner: Executing QL Finished !!") # # employ argparse to run code from commanline # parser = argparse.ArgumentParser(description='Take parameters as input args.') # # parser.add_argument('num_passes', type=int, help='number of passes for learning data from trajectories') # # parser.add_argument('QL_Iters', type=int, help='number of QL iters in regfiniement phase') # # parser.add_argument('eps0_list', metavar='eps0_list', type=float, nargs='+',help='eps0_list') # # parser.add_argument('eps_dec_method', type=int, help='1= dec to 0.05 eps0; 2= dec to 0.5eps0') # # parser.add_argument('N_inc', type=float, help='increment parameter for Nsa') # parser.add_argument('dt_size', type=int, help='training data size 0-5000') # args = parser.parse_args() # Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters, init_Q, with_guidance = QL_params setup_grid_params = setup_grid(num_actions=16) model_file = 'DG_model_2500_train_id_list_1_3D_60nT_a16' g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid_params # Paramerers for QL #Traing data size Training_traj_size_list = [len(train_id_list)] # ALPHA_list = [0.05, 0.5, 1] ALPHA_list = [0.05] esp0_list = [0.1] # esp0_list = [0.33, 0.66, 1] # esp0_list = args.eps0_list
# print("mean= ", summ/cnt) # print("cnt = ", cnt) # print("pfail or badcount% = ", cnt/n) import time import numpy as np from utils.custom_functions import picklePolicy, calc_mean_and_std, read_pickled_File, read_pickled_File import pickle from utils.plot_functions import plot_exact_trajectory_set, plot_learned_policy, plot_exact_trajectory_set_DP from definition import ROOT_DIR import math from utils.setup_grid import setup_grid from os.path import join g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid(num_actions=16) # rel_path = 'Experiments/106/DP' rel_path = 'Experiments/26/DP' exp_num_case_dir = join(ROOT_DIR, rel_path) output_path = exp_num_case_dir test_id_list = [i for i in range(2500,2550)] policy = read_pickled_File(exp_num_case_dir + '/policy') t_list_all, t_list_reached, G_list, bad_count_tuple= plot_exact_trajectory_set_DP(g, policy, X, Y, vel_field_data, test_id_list, output_path, fname='Test_Traj_set' + '_') badcount = bad_count_tuple[0] # Plot Policy print("plot policy")
def build_sparse_transition_model(filename='Transition_dict', n_actions=1, nt=None, dt=None, F=None, startpos=None, endpos=None, Test_grid=False): global state_list global base_path global save_path print("Building Sparse Model") t1 = time.time() #setup grid g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, path_mat, setup_params, setup_param_str = setup_grid( num_actions=n_actions, nt=nt, Test_grid=Test_grid) """ # Prepare Data nT = 1 # total no. of time steps is_stationary = 1 # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary # and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1. # list_size = 10 #predefined size of list for each S2 gsize = 100 # size of grid along 1 direction. ASSUMING square grid. num_actions = 16 nrzns = 5000 bDimx = 1000 F = 1 dt = 1 r_outbound = -100 r_terminal = 100 T = 0 # time index of vrzns i_term = 20 # terminal state indices j_term = 50 """ #setup_params = [num_actions, nt, dt, F, startpos, endpos] reference from setup grid nT = setup_params[1] # total no. of time steps TODO: check default value is_stationary = 1 # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary # and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1. # list_size = 10 #predefined size of list for each S2 # if nt > 1: # is_stationary = 0 gsize = g.ni # size of grid along 1 direction. ASSUMING square grid. num_actions = setup_params[0] nrzns = num_rzns bDimx = nrzns # for small test cases if nrzns >= 1000: bDimx = 1000 #for large problems dt = setup_params[2] F = setup_params[3] r_outbound = g.r_outbound r_terminal = g.r_terminal i_term = g.endpos[0] # terminal state indices j_term = g.endpos[1] dummyT = 0 #name of output pickle file containing transtion prob in dictionary format if nT > 1: prefix = '3D_' + str(nT) + 'nT_a' else: prefix = '2D_a' filename = filename + prefix + str(n_actions) #TODO: change filename base_path = join(ROOT_DIR, 'DP/Trans_matxs/') save_path = base_path + filename if exists(save_path): print("Folder Already Exists !!\n") return params = np.array([ gsize, num_actions, nrzns, F, dt, r_outbound, r_terminal, dummyT, i_term, j_term, nT, is_stationary ]).astype(np.float32) st_sp_size = (gsize**2) * nT # size of total state space save_file_for_each_a = False # cpu initialisations. # dummy intialisations to copy size to gpu vxrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32) vyrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32) results = -1 * np.ones(((gsize**2) * nrzns), dtype=np.float32) sumR_sa = np.zeros(st_sp_size).astype(np.float32) Tdummy = np.zeros(2, dtype=np.float32) # informational initialisations ac_angles = np.linspace(0, 2 * pi, num_actions, dtype=np.float32) ac_angle = ac_angles[0].astype(np.float32) # xs = np.arange(gsize, dtype=np.float32) # ys = np.arange(gsize, dtype=np.float32) xs = xs.astype(np.float32) ys = ys.astype(np.float32) print("params: \n", params, "\n\n") t1 = time.time() # allocates memory on gpu. vxrzns and vyrzns nees be allocated just once and will be overwritten for each timestep vxrzns_gpu = cuda.mem_alloc(vxrzns.nbytes) vyrzns_gpu = cuda.mem_alloc(vyrzns.nbytes) ac_angles_gpu = cuda.mem_alloc(ac_angles.nbytes) ac_angle_gpu = cuda.mem_alloc(ac_angle.nbytes) xs_gpu = cuda.mem_alloc(xs.nbytes) ys_gpu = cuda.mem_alloc(ys.nbytes) params_gpu = cuda.mem_alloc(params.nbytes) T_gpu = cuda.mem_alloc(Tdummy.nbytes) # copies contents of a to allocated memory on gpu cuda.memcpy_htod(ac_angle_gpu, ac_angle) cuda.memcpy_htod(xs_gpu, xs) cuda.memcpy_htod(ys_gpu, ys) cuda.memcpy_htod(params_gpu, params) for T in range(nT): print("Computing data for timestep, T = ", T, '\n') # params[7] = T # cuda.memcpy_htod(params_gpu, params) Tdummy[0] = T # Load Velocities # vxrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32) # #expectinf to see probs of 0.5 in stream area # for i in range(int(nrzns/2)): # vxrzns[i,int(gsize/2 -1):int(gsize/2 +1),:] = 1 # vyrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32) # vxrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Velx_5K_rlzns.npy') # vyrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Vely_5K_rlzns.npy') vxrzns = Vx_rzns vyrzns = Vy_rzns vxrzns = vxrzns.astype(np.float32) vyrzns = vyrzns.astype(np.float32) Tdummy = Tdummy.astype(np.float32) # TODO: sanity check on dimensions: compare loaded matrix shape with gsize, numrzns # copy loaded velocities to gpu cuda.memcpy_htod(vxrzns_gpu, vxrzns) cuda.memcpy_htod(vyrzns_gpu, vyrzns) cuda.memcpy_htod(T_gpu, Tdummy) coo_list_a, Rs_list_a = build_sparse_transition_model_at_T( T, T_gpu, vxrzns_gpu, vyrzns_gpu, params, bDimx, params_gpu, xs_gpu, ys_gpu, ac_angles, results, sumR_sa, save_file_for_each_a=False) # print("R_s_a0 \n", Rs_list_a[0][0:200]) # TODO: end loop over timesteps here and comcatenate COOs and R_sas over timesteps for each action # full_coo_list and full_Rs_list are lists with each element containing coo and R_s for an action of the same index if T > 0: full_coo_list_a, full_Rs_list_a = concatenate_results_across_time( coo_list_a, Rs_list_a, full_coo_list_a, full_Rs_list_a) # TODO: finish concatenate...() function else: full_coo_list_a = coo_list_a full_Rs_list_a = Rs_list_a t2 = time.time() build_time = t2 - t1 #save data to file # data = setup_params, setup_param_str, g.reward_structure, build_time # write_files(full_coo_list_a, filename + '_COO', data) # pickleFile(full_coo_list_a, save_path + '/' + filename + '_COO') # pickleFile(full_Rs_list_a, save_path + '/' + filename + '_Rsa') # print("Pickled sparse files !") #build probability transition dictionary state_list = g.ac_state_space() init_transition_dict = initialise_dict(g) transition_dict = convert_COO_to_dict(init_transition_dict, g, full_coo_list_a, full_Rs_list_a) #save dictionary to file data = setup_params, setup_param_str, g.reward_structure, build_time write_files(transition_dict, filename, data) # t1 = time.time() # build_sparse_transition_model(filename = 'Highway_', n_actions = 16, nt = 1) # t2 = time.time() # print("Time for 1 timestep = ", t2 - t1, " secs")
def build_sparse_transition_model(filename='Transition_dict', n_actions=16, all_Yi=None, wanted_nrzns=5000, nt=None): global state_list global base_path global save_path print("Building Sparse Model") t1 = time.time() #setup grid print("input to build_sparse_trans_model:\n") print("n_actions", n_actions) # print("nt, dt", nt, dt) g, xs, ys, X, Y, vel_field_data, nmodes, num_rzns, path_mat, setup_params, setup_param_str = setup_grid( num_actions=n_actions) print("xs: ", xs) print("ys", ys) # ------- IMPORTANT ---------- # REMOVING all_Yi FROM ROLLING OUT vel_field_data BECAUSE WE WILL USE A REPLACEMENT all_u_mat, all_v_mat, all_ui_mat, all_vi_mat, _ = vel_field_data check_nt, check_nrzns, nmodes = all_Yi.shape all_u_mat = all_u_mat.astype(np.float32) all_v_mat = all_v_mat.astype(np.float32) all_ui_mat = all_ui_mat.astype(np.float32) all_vi_mat = all_vi_mat.astype(np.float32) all_Yi = all_Yi.astype(np.float32) #setup_params = [num_actions, nt, dt, F, startpos, endpos] reference from setup grid nT = setup_params[1] # total no. of time steps TODO: check default value print("****CHECK: ", nt, nT, check_nt) # assert (nt == nT), "nt and nT are not the same!" #if nt specified in runner is within nT from param file, then use nt. i.e. model will be built for nt timesteps. if nt != None and nt <= nT: nT = nt is_stationary = 0 # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary # and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1. # list_size = 10 #predefined size of list for each S2 # if nt > 1: # is_stationary = 0 gsize = g.ni # size of grid along 1 direction. ASSUMING square grid. num_actions = setup_params[0] # nrzns = num_rzns nrzns = wanted_nrzns bDimx = nrzns # for small test cases if nrzns >= 1000: bDimx = 1000 #for large problems dt = setup_params[2] F = setup_params[3] r_outbound = g.r_outbound r_terminal = g.r_terminal i_term = g.endpos[0] # terminal state indices j_term = g.endpos[1] #name of output pickle file containing transtion prob in dictionary format if nT > 1: prefix = '3D_' + str(nT) + 'nT_a' else: prefix = '2D_a' filename = filename + prefix + str(n_actions) #TODO: change filename base_path = join(ROOT_DIR, 'DP/Trans_matxs_3D/') save_path = base_path + filename if exists(save_path): print("Folder Already Exists !!\n") return # TODO: remove z from params. it is only for chekcs z = -9999 params = np.array([ gsize, num_actions, nrzns, F, dt, r_outbound, r_terminal, nmodes, i_term, j_term, nT, is_stationary, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z ]).astype(np.float32) st_sp_size = (gsize**2) # size of spatial state space print("check stsp_size", gsize, nT, st_sp_size) save_file_for_each_a = False print("params") print("gsize ", params[0], "\n", "num_actions ", params[1], "\n", "nrzns ", params[2], "\n", "F ", params[3], "\n", "dt ", params[4], "\n", "r_outbound ", params[5], "\n", "r_terminal ", params[6], "\n", "nmodes ", params[7], "\n", "i_term ", params[8], "\n", "j_term ", params[9], "\n", "nT", params[10], "\n", "is_stationary ", params[11], "") # cpu initialisations. # dummy intialisations to copy size to gpu # vxrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32) # vyrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32) results = -1 * np.ones(((gsize**2) * nrzns), dtype=np.float32) sumR_sa = np.zeros(st_sp_size).astype(np.float32) Tdummy = np.zeros(2, dtype=np.float32) # informational initialisations ac_angles = np.linspace(0, 2 * pi, num_actions, endpoint=False, dtype=np.float32) print("action angles:\n", ac_angles) ac_angle = ac_angles[0].astype(np.float32) # just for allocating memory # xs = np.arange(gsize, dtype=np.float32) # ys = np.arange(gsize, dtype=np.float32) xs = xs.astype(np.float32) ys = ys.astype(np.float32) print("params: \n", params, "\n\n") t1 = time.time() # allocates memory on gpu. vxrzns and vyrzns nees be allocated just once and will be overwritten for each timestep # vxrzns_gpu = cuda.mem_alloc(vxrzns.nbytes) # vyrzns_gpu = cuda.mem_alloc(vyrzns.nbytes) all_u_mat_gpu = cuda.mem_alloc(all_u_mat.nbytes) all_v_mat_gpu = cuda.mem_alloc(all_v_mat.nbytes) all_ui_mat_gpu = cuda.mem_alloc(all_ui_mat.nbytes) all_vi_mat_gpu = cuda.mem_alloc(all_vi_mat.nbytes) all_Yi_gpu = cuda.mem_alloc(all_Yi.nbytes) vel_data_gpu = [ all_u_mat_gpu, all_v_mat_gpu, all_ui_mat_gpu, all_vi_mat_gpu, all_Yi_gpu ] ac_angles_gpu = cuda.mem_alloc(ac_angles.nbytes) ac_angle_gpu = cuda.mem_alloc(ac_angle.nbytes) xs_gpu = cuda.mem_alloc(xs.nbytes) ys_gpu = cuda.mem_alloc(ys.nbytes) params_gpu = cuda.mem_alloc(params.nbytes) T_gpu = cuda.mem_alloc(Tdummy.nbytes) # copies contents of a to allocated memory on gpu cuda.memcpy_htod(all_u_mat_gpu, all_u_mat) cuda.memcpy_htod(all_v_mat_gpu, all_v_mat) cuda.memcpy_htod(all_ui_mat_gpu, all_ui_mat) cuda.memcpy_htod(all_vi_mat_gpu, all_vi_mat) cuda.memcpy_htod(all_Yi_gpu, all_Yi) cuda.memcpy_htod(ac_angle_gpu, ac_angle) cuda.memcpy_htod(xs_gpu, xs) cuda.memcpy_htod(ys_gpu, ys) cuda.memcpy_htod(params_gpu, params) for T in range(nT): print("*** Computing data for timestep, T = ", T, '\n') # params[7] = T # cuda.memcpy_htod(params_gpu, params) Tdummy[0] = T # Load Velocities # vxrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32) # #expectinf to see probs of 0.5 in stream area # for i in range(int(nrzns/2)): # vxrzns[i,int(gsize/2 -1):int(gsize/2 +1),:] = 1 # vyrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32) # vxrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Velx_5K_rlzns.npy') # vyrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Vely_5K_rlzns.npy') # vxrzns = Vx_rzns # vyrzns = Vy_rzns # vxrzns = vxrzns.astype(np.float32) # vyrzns = vyrzns.astype(np.float32) Tdummy = Tdummy.astype(np.float32) # TODO: sanity check on dimensions: compare loaded matrix shape with gsize, numrzns # copy loaded velocities to gpu # cuda.memcpy_htod(vxrzns_gpu, vxrzns) # cuda.memcpy_htod(vyrzns_gpu, vyrzns) cuda.memcpy_htod(T_gpu, Tdummy) print("pre func") coo_list_a, Rs_list_a = build_sparse_transition_model_at_T( T, T_gpu, vel_data_gpu, params, bDimx, params_gpu, xs_gpu, ys_gpu, ac_angles, results, sumR_sa, save_file_for_each_a=False) # print("R_s_a0 \n", Rs_list_a[0][0:200]) print("post func") # TODO: end loop over timesteps here and comcatenate COOs and R_sas over timesteps for each action # full_coo_list and full_Rs_list are lists with each element containing coo and R_s for an action of the same index if T > 0: full_coo_list_a, full_Rs_list_a = concatenate_results_across_time( coo_list_a, Rs_list_a, full_coo_list_a, full_Rs_list_a) # TODO: finish concatenate...() function else: full_coo_list_a = coo_list_a full_Rs_list_a = Rs_list_a t2 = time.time() build_time = t2 - t1 print("build_time ", build_time) #save data to file # data = setup_params, setup_param_str, g.reward_structure, build_time # write_files(full_coo_list_a, filename + '_COO', data) # print("Pickled sparse files !") #build probability transition dictionary state_list = g.ac_state_space() init_transition_dict = initialise_dict(g) transition_dict = convert_COO_to_dict(init_transition_dict, g, full_coo_list_a, full_Rs_list_a) print("conversion COO to dict done") #save dictionary to file data = setup_params, setup_param_str, g.reward_structure, build_time write_files(transition_dict, filename, data) pickleFile(full_coo_list_a, save_path + '/' + filename + '_COO') pickleFile(full_Rs_list_a, save_path + '/' + filename + '_Rsa')
from utils.setup_grid import setup_grid g, xs, ys, X, Y, vel_field_data, nmodes, nrzns, paths, params, param_str = setup_grid( ) print(X) print(Y) print(paths[0, 0].shape)