def convert_COO_to_dict(tdict, g, coo_file, Rsa_file): """ takes in saved coo and Rsa files. unpacks them and converts them to dict :param init_transition_dict: :param g: :param coo_file: :param Rsa_file: :return: TODO: add another loop for T for 3d case """ coo_list = read_pickled_File(coo_file) Rs_list = read_pickled_File(Rsa_file) num_actions = len(g.actions) for i in range(num_actions): coo = coo_list[i] Rs = Rs_list[i] m, n = coo.shape a = g.actions[i] for k in range(n): S1 = get_S_from_S_id(coo[0, k], g.ni) S2 = get_S_from_S_id(coo[1, k], g.ni) prob = coo[2, k] r = Rs[int(coo[0, k])] try: tdict[S1][a][S2] = (prob, r) except: print(S1, 'is not an actionable state') return tdict
def run_onboard_routing_for_test_data(exp_num_case_dir, setup_grid_params, opfname): global ALPHA global N_inc global train_id_list global test_id_list global sars_traj_list global eps_0 Q = read_pickled_File(join(exp_num_case_dir, 'Q2')) N = read_pickled_File(join(exp_num_case_dir, 'N2')) policy = read_pickled_File(join(exp_num_case_dir, 'Policy_02')) test_id_list = read_pickled_File(join(exp_num_case_dir, 'test_id_list')) train_id_list = read_pickled_File(join(exp_num_case_dir, 'train_id_list')) # sars_traj_list = read_pickled_File(join(exp_num_case_dir, 'sars_traj_Train_Trajectories_after_exp')) train_output_params = read_pickled_File(join(exp_num_case_dir, 'output_paramaters')) print("*********** 1 **************\n") ALPHA = train_output_params[9] N_inc = train_output_params[11] eps_0 = 0.05 print("ALPHA, N_inc = ", ALPHA, N_inc) # print('len(sars_traj_list) = ', len(sars_traj_list)) print("len(train_id_list)= ", len(train_id_list)) print("len(test_id_list)= ", len(test_id_list)) print('n_test_paths_range = ', n_test_paths_range) t_list, bad_count = run_and_plot_onboard_routing_episodes(setup_grid_params, Q, N, exp_num_case_dir, opfname ) print("*********** 2 **************\n") phase2_results = calc_mean_and_std(t_list) picklePolicy(phase2_results,join(exp_num_case_dir, opfname)) writePolicytoFile(phase2_results,join(exp_num_case_dir, opfname)) avg_time_ph2, std_time_ph2, cnt_ph2 , none_cnt_ph2 = phase2_results print("test_id_list[range] = ", test_id_list[n_test_paths_range[0]:n_test_paths_range[1]]) print("----- phase 2 data ---------") print("avg_time_ph2", avg_time_ph2,'\n', "std_time_ph2", std_time_ph2, '\n', "cnt_ph2",cnt_ph2 , '\n', "none_cnt_ph2", none_cnt_ph2) # Compare g, xs, ys, X, Y, vel_field_data, nmodes, _, _, _, _ = setup_grid_params p1_t_list, p1_G_list, p1_bad_count = plot_and_return_exact_trajectory_set_train_data(g, policy, X, Y, vel_field_data, nmodes, test_id_list,n_test_paths_range, exp_num_case_dir, fname='Explicit_Phase_1_test_' + str(run_number)) phase1_results = calc_mean_and_std(p1_t_list) avg_time_ph1, std_time_ph1, cnt_ph1 , none_cnt_ph1 = phase1_results print("----- phase 1 data ---------") print("avg_time_ph1", avg_time_ph1,'\n', "std_time_ph1", std_time_ph1, '\n', "cnt_ph1",cnt_ph1 , '\n', "none_cnt_ph1", none_cnt_ph1) return
from utils.plot_functions import plot_exact_trajectory_set, plot_learned_policy, plot_exact_trajectory_set_DP from definition import ROOT_DIR import math from utils.setup_grid import setup_grid from os.path import join g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid(num_actions=16) # rel_path = 'Experiments/106/DP' rel_path = 'Experiments/26/DP' exp_num_case_dir = join(ROOT_DIR, rel_path) output_path = exp_num_case_dir test_id_list = [i for i in range(2500,2550)] policy = read_pickled_File(exp_num_case_dir + '/policy') t_list_all, t_list_reached, G_list, bad_count_tuple= plot_exact_trajectory_set_DP(g, policy, X, Y, vel_field_data, test_id_list, output_path, fname='Test_Traj_set' + '_') badcount = bad_count_tuple[0] # Plot Policy print("plot policy") plot_learned_policy(g, DP_params = [policy, output_path], vel_field_data=vel_field_data) print("write list to file") # write_list_to_file(t_list_all, output_path+'/t_list_all') # write_list_to_file(G_list, output_path +'/G_list') print("calc mean and std") mean_tlist,_std_tlist, cnt, none_cnt = calc_mean_and_std(t_list_all) mean_glist, _, _, _ = calc_mean_and_std(G_list)
def plot_paths_colored_by_EAT(plotFile=None, baseFile=None, savePath_fname=None): msize = 15 fsize = 3 #---------------------------- beautify plot --------------------------- # time calculation and state trajectory fig = plt.figure(figsize=(fsize, fsize)) ax = fig.add_subplot(1, 1, 1) ax.set_xlim(0, 100) ax.set_ylim(0, 100) # set grid minor_ticks = [i for i in range(101) if i % 20 != 0] major_ticks = [i for i in range(0, 120, 20)] ax.set_xticks(minor_ticks, minor=True) ax.set_xticks(major_ticks, minor=False) ax.set_yticks(major_ticks, minor=False) ax.set_yticks(minor_ticks, minor=True) ax.grid(b=True, which='both', color='#CCCCCC', axis='both', linestyle='-', alpha=0.5) ax.tick_params(axis='both', which='both', labelsize=6) ax.set_xlabel('X (Non-Dim)') ax.set_ylabel('Y (Non-Dim)') # st_point= g.start_state # plt.scatter(g.xs[st_point[1]], g.ys[g.ni - 1 - st_point[0]], marker = 'o', s = msize, color = 'k', zorder = 1e5) # plt.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], marker = '*', s = msize*2, color ='k', zorder = 1e5) plt.gca().set_aspect('equal', adjustable='box') #---------------------------- main plot --------------------------- # read file plot_set = read_pickled_File(plotFile) # calculate time time_list = [] l = len(plot_set) # if baseFile is provided, comparison plot will be made. colorbar will show EAT time differnces. if baseFile != None: base_traj_set = read_pickled_File(baseFile) l_base = len(base_traj_set) # sanity check if l != l_base: print( "ERROR: Unfair Comparison. Two lists should have data across same number of realisations" ) return for i in range(l): if plot_set[i] != None and base_traj_set[i] != None: t_plot_set_i = len(plot_set[i][0]) t_base_set_i = len(base_traj_set[i][0]) time_list.append(t_plot_set_i - t_base_set_i) # if baseFile is NOT provided, then the basePlot data will be plotted. else: for i in range(l): if plot_set[i] != None: time_list.append(len(plot_set[i][0])) # set colormap jet = cm = plt.get_cmap('jet') cNorm = colors.Normalize(vmin=np.min(time_list), vmax=np.max(time_list)) scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) scalarMap._A = [] # plot plot_set for i in range(int(l)): if plot_set[i] != None: colorval = scalarMap.to_rgba(time_list[i]) plt.plot(plot_set[i][0], plot_set[i][1], color=colorval, alpha=0.6) plt.colorbar(scalarMap) if savePath_fname != None: plt.savefig(savePath_fname, bbox_inches="tight", dpi=300) plt.show() return time_list
def run_DP(setup_grid_params, prob_file, output_file, output_path, threshold=1e-3, eg_rzn=1): #TODO: use appropriate plot funtions that utilise vel_field_data instead of vxrzns and vyrzns #Set up grid g, xs, ys, X, Y, vel_field_data, nmodes, num_rzns, path_mat, params, param_str = setup_grid_params print("g.nt: ", g.nt) global action_state_space action_state_space = g.ac_state_space() # print(action_state_space) #Read transition probability prob_full_filename = ROOT_DIR + '/DP/Trans_matxs_3D/' + prob_file + '/' + prob_file Trans_prob = read_pickled_File(prob_full_filename) #Initialise Policy and V policy, V = initialise_policy_and_V(g) countb = 0 print("Parameters:\n", "xs: ", xs, '\n', "ys: ", ys, '\n', "nmodes: ", nmodes, '\n', "num_rzns: ", num_rzns, '\n') for i in range(len(params)): print(param_str[i], ": ", params[i]) start = time.time() #Iterate VI updates while True: countb += 1 # print("iter: ", countb) loop_time = time.time() V, del_V_max, flagged_state = value_iteration_update(g, V, Trans_prob) if countb % 1 == 0: print("--- iter: ", countb, '\n', "del_V_max: ", del_V_max, '\n', "flagged_state: ", flagged_state, '\n', "cumul_time: ", (time.time() - start) / 60, ' mins\n', "iter_time: ", (time.time() - loop_time) / 60, ' mins') if del_V_max < threshold: print("Converged after ", countb, " iterations") break # if countb % 50 == 0: # print("compute policy learned upitil iter ", countb ) # policy = compute_Policy(g, policy, Trans_prob, V) # print("pickle policy") # picklePolicy(policy, output_path + '/partial_policy_till_iter' + str(countb)) print() # Compute policy print("launch compute policy after convergence") policy = compute_Policy(g, policy, Trans_prob, V) end = time.time() DP_compute_time = end - start # Save policy to file print("pickle policy") picklePolicy(policy, output_path + '/policy') # TODO: make corrections to plot_exact_trajcetory() # print("plot exct trajectory") # trajectory, G = plot_exact_trajectory(g, policy, X, Y, Vx_rzns[eg_rzn,:,:], Vy_rzns[eg_rzn,:,:], output_path, fname='Sample_Traj_with_policy_in rzn_'+ str(eg_rzn), lastfig = True) print("plot exct trajectory set") test_id_list = [i for i in range(4500, 5000)] t_list_all, t_list_reached, G_list, bad_count_tuple = plot_exact_trajectory_set_DP( g, policy, X, Y, vel_field_data, test_id_list, output_path, fname='Test_Traj_set' + output_file) badcount = bad_count_tuple[0] print("t_list_all ", t_list_all) # Plot Policy print("plot policy") plot_learned_policy(g, DP_params=[policy, output_path], vel_field_data=vel_field_data) print("write list to file") write_list_to_file(t_list_all, output_path + '/t_list_all') write_list_to_file(G_list, output_path + '/G_list') print("calc mean and std") mean_tlist, _std_tlist, _, _ = calc_mean_and_std(t_list_all) mean_glist, _, _, _ = calc_mean_and_std(G_list) return V[g.start_state], mean_tlist, np.std( t_list_all), badcount, DP_compute_time, mean_glist
plt.savefig(join(fpath,fname),bbox_inches = "tight", dpi=200) plt.cla() plt.close(fig) print("*** pickling traj_list ***") picklePolicy(traj_list, join(fpath,fname + 'coord_traj')) # picklePolicy(sars_traj_list,join(fpath,'sars_traj_'+fname) ) print("*** pickled ***") return t_list, G_list, bad_count g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid(num_actions=16) rel_path = 'Experiments/26/DP' exp_num_case_dir = join(ROOT_DIR, rel_path) policy = read_pickled_File(join(exp_num_case_dir, 'policy')) # tlist_file = join(exp_num_case_dir, 'TrajTimes2.txt') # with open(tlist_file, 'r') as f: # phase1_tlist = ast.literal_eval(f.read()) # test_id_rel_path ='Experiments/104/QL/num_passes_50/QL_Iter_x1/dt_size_2500/ALPHA_0.05/eps_0_0.1' # test_id_list = read_pickled_File(join(test_id_rel_path, 'test_id_list')) test_id_list = [i for i in range(4000,5000)] global n_test_paths_range n_test_paths_range = [0, len(test_id_list)] t_list, G_list, bad_count = plot_and_return_exact_trajectory_set_train_data(g, policy, X, Y, vel_field_data, nmodes, test_id_list, n_test_paths_range, exp_num_case_dir, fname='Explicit_plot_DPpolicy_test_4k_5k') phase1_results = calc_mean_and_std(t_list)
This file is to generate all_Yi_mat with modes corresponding to realisations in the train_id_list: list of rzns for training. The output will further be used to construct a transiton model for DP based on the given realisations. """ from utils.custom_functions import read_pickled_File, get_rzn_ids_for_training_and_testing from utils.setup_grid import setup_grid from definition import ROOT_DIR from os.path import join import numpy as np # Get train_id_list rel_path = 'Experiments/104/QL/num_passes_50/QL_Iter_x1/dt_size_2500/ALPHA_0.05/eps_0_0.1' exp_num_case_dir = join(ROOT_DIR, rel_path) train_id_list = read_pickled_File(join(exp_num_case_dir, 'train_id_list')) train_size = len(train_id_list) print("len(train_id_list)= ", train_size) # get all_Yi_mat of required nT Yi_mat5k_path = join(ROOT_DIR, 'Input_data_files/nT_60/all_Yi.npy') all_Yi_mat_5kmodes = np.load(Yi_mat5k_path) nT, original_nrzns, nmodes = all_Yi_mat_5kmodes.shape assert (all_Yi_mat_5kmodes.shape == (60, 5000, 5)), "Shape Misamatch: CCheck" # make all_Yi_mat of wanted size: # Target_all_Yi_mat = np.zeros((nT, train_size, nmodes)) Target_all_Yi_mat = all_Yi_mat_5kmodes[:, train_id_list, :] print(Target_all_Yi_mat.shape) #checks
print("*** pickling traj_list ***") # picklePolicy(traj_list, join(fpath,fname + 'coord_traj')) # picklePolicy(sars_traj_list,join(fpath,'sars_traj_'+fname) ) print("*** pickled ***") return g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid( num_actions=16) # rel_path = 'Experiments/153/QL/num_passes_50/QL_Iter_x1/dt_size_5000/ALPHA_0.05/eps_0_0.1/' rel_path = 'Experiments/26/DP' exp_num_case_dir = join(ROOT_DIR, rel_path) coord_data = read_pickled_File( join(exp_num_case_dir, 'Explicit_plot_DPpolicy_test_4k_5kcoord_traj')) # coord_data = read_pickled_File(join(exp_num_case_dir, 'Test_Trajectories_after_exp')) # coord_data = read_pickled_File(join(exp_num_case_dir, 'Trajectories_before_exp')) # print(coord_data) print(len(coord_data)) pretty_plot(g, coord_data, exp_num_case_dir, 'color_4k_5k-no_cb') # summ = 0 # cnt = 0 # print(len(phase1_tlist)) # print(test_id_list[:n_test_paths]) # for i in range(n_test_paths): # rzn = test_id_list[i] # t = phase1_tlist[rzn] # print(t)