def run_Experiment(DP=None, QL=None): """ Runs experiment using DP, QL or both. Creates new directory automatically Save result summary to summary file :param DP: [prob_file(just name of file, not path), output_path] :param QL: [, .....] :return: """ # Path information output_path, exp_num = create_new_dir() #dirs Exp/1, Exp/2, ... DP_path = join(output_path, 'DP') #dirs Exp/1/DP QL_path = join(output_path, 'QL') #dirs Exp/1/QL print("************ Exp ", exp_num, "************ \n") # Exp_summary_data method = get_method_str(DP, QL) exp_summary = [str(exp_num), method] # Run DP if DP != None: print("In Runner: Executing DP !!") prob_file = DP[0] createFolder(DP_path) # output_params = [V_so, mean, variance, bad_count] output_params = run_DP(setup_grid_params, prob_file, output_file, DP_path, threshold=threshold) """CHANGE ARGUMENT if return order of setup_grid() is changed""" input_params = setup_grid_params[9].copy() input_params.append(prob_file) exp_summary = append_params_to_summary(exp_summary, input_params, output_params) append_summary_to_summaryFile('Experiments/Exp_summary.csv', exp_summary) print("In Runner: Executing DP Finished!!") # Run QL if QL != None: print("In Runner: Executing QL !!") QL_params = QL createFolder(QL_path) output_parameters_all_cases = run_QL(setup_grid_params, QL_params, QL_path, exp_num) # run_QL(setup_grid_params, QL_params, QL_path) print("In Runner: Executing QL Finished !!")
def write_files(transition_dict, filename, data): """ Pickles dictionary contaniing model details. Writes parameters to file. Writes parameters to summary file :param transition_dict: :param filename: :return: """ summary_file = base_path + 'model_summary.csv' params, param_str, reward_structure, build_time = data createFolder(save_path) # save transition_probs. Pickle it. with open(save_path + '/' + filename + '.p', 'wb') as fp: pickle.dump(transition_dict, fp, protocol=pickle.HIGHEST_PROTOCOL) with open(save_path + '/' + filename + '_params.txt', 'w') as f: for i in range(len(param_str)): f.write(param_str[i] + ':' + ' ' + str(params[i]) + "\n") f.write("Reward Structure: " + str(reward_structure) + "\n") f.write("Build Time: " + str(build_time))
def run_QL(setup_grid_params, QL_params, QL_path): exp = QL_path #Parameters # Training_traj_size_list = [1000, 2000, 3000, 4000, 5000] # ALPHA_list = [0.2, 0.35, 0.5, 0.75] # esp0_list = [0.25, 0.5, 0.75] # with_guidance = True # Training_traj_size_list = [5000] # ALPHA_list = [0.5] # esp0_list = [0.5] # QL_Iters = int(1000) # # init_Q = -1000000 # stream_speed = 0.2 Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters, init_Q, with_guidance, method, num_passes = QL_params #Read data from files g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, paths, params, param_str = setup_grid_params print("In TQLearn: ", len(params), params) num_actions, nt, dt, F, startpos, endpos = params total_cases = len(Training_traj_size_list) * len(ALPHA_list) * len( esp0_list) str_Params = [ 'with_guidance', 'Training_traj_size_list', 'ALPHA_list', 'esp0_list', 'QL_Iters', 'num_actions', 'init_Q', 'dt', 'F' ] Params = [ with_guidance, Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters, num_actions, init_Q, dt, F ] Param_filename = exp + '/Parmams.txt' outputfile = open(Param_filename, 'w+') for i in range(len(Params)): print(str_Params[i] + ': ', Params[i], file=outputfile) outputfile.close() #Create Sub-directories for different hyper parameters for eps_0 in esp0_list: for ALPHA in ALPHA_list: for dt_size in Training_traj_size_list: directory = exp + '/dt_size_' + str(dt_size) + '/ALPHA_' + str( ALPHA) + '/eps_0_' + str(eps_0) createFolder(directory) case = 0 start = time.time() for eps_0 in esp0_list: for ALPHA in ALPHA_list: for dt_size in Training_traj_size_list: dir_path = exp + '/dt_size_' + str(dt_size) + '/ALPHA_' + str( ALPHA) + '/eps_0_' + str(eps_0) + '/' case += 1 print("******* CASE: ", case, '/', total_cases, '*******') print("with_guidance= ", with_guidance) print('eps_0 = ', eps_0) print('ALPHA =', ALPHA) print('dt_size = ', dt_size) #Reset Variables and environment if with_guidance == True: Q, N = initialise_guided_Q_N(g, init_Q, init_Q / 2, 1) else: Q, N = initialise_Q_N(g, init_Q, 1) g.set_state(g.start_state) #Learn Policy From Trajectory Data if dt_size != 0: Q, policy, max_delQ_list_1 = Learn_policy_from_data( paths, g, Q, N, Vx_rzns, Vy_rzns, num_of_paths=dt_size, num_actions=num_actions, ALPHA=ALPHA, method=method, num_passes=num_passes) plot_max_Qvalues(Q, policy, X, Y) #Save policy Policy_path = dir_path + 'Policy_01' #Plot Policy # Fig_policy_path = dir_path+'Fig_'+ 'Policy01'+'.png' label_data = [F, ALPHA, init_Q, QL_Iters] QL_params = policy, Q, init_Q, label_data, dir_path plot_learned_policy(g, QL_params=QL_params) # plot_all_policies(g, Q, policy, init_Q, label_data, full_file_path= Fig_policy_path ) writePolicytoFile(policy, Policy_path) plot_max_delQs(max_delQ_list_1, filename=dir_path + 'delQplot1') else: if with_guidance == True: policy = initialise_policy_from_initQ(Q) else: policy = initialise_policy(g) #Times and Trajectories based on data and/or guidance t_list1, G0_list1, bad_count1 = plot_exact_trajectory_set( g, policy, X, Y, Vx_rzns, Vy_rzns, exp, fname=dir_path + 'Trajectories_before_exp') #Learn from Experience Q, policy, max_delQ_list_2 = Q_learning_Iters(Q, N, g, policy, Vx_rzns, Vy_rzns, alpha=ALPHA, QIters=QL_Iters, eps_0=eps_0) #save Updated Policy Policy_path = dir_path + 'Policy_02' # Fig_policy_path = dir_path + 'Fig_' + 'Policy02' + '.png' writePolicytoFile(policy, Policy_path) # plot_learned_policy(g, Q, policy, init_Q, label_data, Iters_after_update=QL_Iters, full_file_path= Fig_policy_path ) #plots after Experince plot_max_delQs(max_delQ_list_2, filename=dir_path + 'delQplot2') t_list2, G0_list2, bad_count2 = plot_exact_trajectory_set( g, policy, X, Y, Vx_rzns, Vy_rzns, exp, fname=dir_path + 'Trajectories_after_exp') #Results to be printed # avg_time1 = np.mean(t_list1) # std_time1 = np.std(t_list1) # avg_G01 = np.mean(G0_list1) # avg_time2 = np.mean(t_list2) # std_time2 = np.std(t_list2) # avg_G02 = np.mean(G0_list2) avg_time1, std_time1, _, _ = calc_mean_and_std(t_list1) avg_G01, _, _, _ = calc_mean_and_std(G0_list1) avg_time2, std_time2, _, _ = calc_mean_and_std(t_list2) avg_G02, _, _, _ = calc_mean_and_std(G0_list2) if QL_Iters != 0: bad_count1 = (bad_count1, str(bad_count1 * 100 / dt_size) + '%') bad_count2 = (bad_count2, str(bad_count2 * 100 / dt_size) + '%') #Print results to file str_Results1 = [ 'avg_time1', 'std_time1', 'bad_count1', 'avg_G01' ] Results1 = [avg_time1, std_time1, bad_count1, avg_G01] str_Results2 = [ 'avg_time2', 'std_time2', 'bad_count2', 'avg_G02' ] Results2 = [avg_time2, std_time2, bad_count2, avg_G02] Result_filename = dir_path + 'Results.txt' outputfile = open(Result_filename, 'w+') print("Before Experince ", file=outputfile) for i in range(len(Results1)): print(str_Results1[i] + ': ', Results1[i], file=outputfile) print(end="\n" * 3, file=outputfile) print("After Experince ", file=outputfile) for i in range(len(Results2)): print(str_Results2[i] + ': ', Results2[i], file=outputfile) print(end="\n" * 3, file=outputfile) print("Parameters: ", file=outputfile) for i in range(len(Params)): print(str_Params[i] + ': ', Params[i], file=outputfile) outputfile.close() #Print out times to file TrajTimes_filename = dir_path + 'TrajTimes1.txt' outputfile = open(TrajTimes_filename, 'w+') print(t_list1, file=outputfile) outputfile.close() Returns_filename = dir_path + 'G0list1.txt' outputfile = open(Returns_filename, 'w+') print(G0_list1, file=outputfile) outputfile.close() TrajTimes_filename = dir_path + 'TrajTimes2.txt' outputfile = open(TrajTimes_filename, 'w+') print(t_list2, file=outputfile) outputfile.close() Returns_filename = dir_path + 'G0list2.txt' outputfile = open(Returns_filename, 'w+') print(G0_list2, file=outputfile) outputfile.close() end = time.time() time_taken = round(end - start, 2) #Terminal Print print('time_taken= ', time_taken, 's', end="\n" * 3) end = start
def plot_all_policies(g, Q, policy, init_Q, label_data, showfig=False, Iters_after_update=None, full_file_path=None): createFolder(full_file_path) # set grid fig1 = plt.figure(figsize=(10, 10)) ax1 = fig1.add_subplot(1, 1, 1) F, stream_speed, ALPHA, initq, QIters = label_data ax1.text(0.1, 9, 'F=(%s)' % F, fontsize=12) ax1.text(0.1, 8, 'ALPHA=(%s)' % ALPHA, fontsize=12) ax1.text(0.1, 7, 'initq=(%s)' % initq, fontsize=12) ax1.text(0.1, 6, 'QIters=(%s)' % QIters, fontsize=12) minor_xticks = np.arange(g.xs[0] - 0.5 * g.dj, g.xs[-1] + 2 * g.dj, g.dj) minor_yticks = np.arange(g.ys[0] - 0.5 * g.di, g.ys[-1] + 2 * g.di, g.di) major_xticks = np.arange(g.xs[0], g.xs[-1] + 2 * g.dj, 5 * g.dj) major_yticks = np.arange(g.ys[0], g.ys[-1] + 2 * g.di, 5 * g.di) ax1.set_xticks(minor_xticks, minor=True) ax1.set_yticks(minor_yticks, minor=True) ax1.set_xticks(major_xticks) ax1.set_yticks(major_yticks) ax1.grid(which='major', color='#CCCCCC', linestyle='') ax1.grid(which='minor', color='#CCCCCC', linestyle='--') ax1.scatter(g.xs[g.start_state[2]], g.ys[g.ni - 1 - g.start_state[1]], c='g') ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r') xtr = [] ytr = [] ax_list = [] ay_list = [] for s in Q.keys(): t, i, j = s for a in Q[s].keys(): if Q[s][a] != init_Q and a == policy[s]: xtr.append(g.xs[j]) ytr.append(g.ys[g.ni - 1 - i]) # print("test", s, a_policy) ax, ay = action_to_quiver(a) ax_list.append(ax) ay_list.append(ay) # print(i,j,g.xs[j], g.ys[g.ni - 1 - i], ax, ay) plt.quiver(xtr, ytr, ax_list, ay_list) filename = full_file_path + '/policy@t' fig1.savefig(filename, dpi=300) if showfig == True: plt.show() return
def run_QL(setup_grid_params, QL_params, QL_path, exp_num): exp = QL_path Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters_multiplier_list, init_Q, with_guidance, method, num_passes_list, eps_dec_method, N_inc = QL_params #Read data from files #setup_params (from setup_grid.py)= [num_actions, nt, dt, F, startpos, endpos] g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, setup_params, setup_param_str = setup_grid_params print("In TQLearn: ", len(setup_params), setup_params) num_actions, nt, dt, F, startpos, endpos = setup_params #print QL Parameters to file total_cases = len(Training_traj_size_list) * len(ALPHA_list) * len( esp0_list) * len(num_passes_list) * len(QL_Iters_multiplier_list) str_Params = [ 'with_guidance', 'Training_traj_size_list', 'ALPHA_list', 'esp0_list', 'QL_Iters', 'num_actions', 'init_Q', 'dt', 'F' ] Params = [ with_guidance, Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters_multiplier_list, num_actions, init_Q, dt, F ] Param_filename = exp + '/Parmams.txt' outputfile = open(Param_filename, 'w+') for i in range(len(Params)): print(str_Params[i] + ': ', Params[i], file=outputfile) outputfile.close() #Create Sub-directories for different hyper parameters for num_passes in num_passes_list: for QL_Iters_x in QL_Iters_multiplier_list: for eps_0 in esp0_list: for ALPHA in ALPHA_list: for dt_size in Training_traj_size_list: directory = exp + '/num_passes_' + str( num_passes) + '/QL_Iter_x' + str( QL_Iters_x) + '/dt_size_' + str( dt_size) + '/ALPHA_' + str( ALPHA) + '/eps_0_' + str(eps_0) createFolder(directory) case = 0 #initilise case. Each case is an experiment with a particular combination of eps_0, ALPHA and dt_size output_parameters_all_cases = [ ] # contains output params for runQL for all the cases t_start_RUN_QL = time.time() query_state = (58, 20, 41) for num_passes in num_passes_list: for QL_Iters_x in QL_Iters_multiplier_list: for eps_0 in esp0_list: for ALPHA in ALPHA_list: for dt_size in Training_traj_size_list: # test_size = useful_num_rzns - dt_size #number of trajetcories to be used for testing t_start_case = time.time() dir_path = exp + '/num_passes_' + str( num_passes) + '/QL_Iter_x' + str( QL_Iters_x) + '/dt_size_' + str( dt_size) + '/ALPHA_' + str( ALPHA) + '/eps_0_' + str(eps_0) + '/' case += 1 QL_Iters = QL_Iters_x * dt_size print("******* CASE: ", case, '/', total_cases, '*******') print("num_passes= ", num_passes) print("QL_Iters_x= ", QL_Iters_x) print("with_guidance= ", with_guidance) print('eps_0 = ', eps_0) print('ALPHA =', ALPHA) print('dt_size = ', dt_size) print("N_inc= ", N_inc) print("num_actions= ", num_actions) # get respective indices fo trajectories for training and testing: train_id_list, test_id_list, train_id_set, test_id_set, goodlist = get_rzn_ids_for_training_and_testing( ) print("$$$$ check in TQ : train_id_list", train_id_list[0:20]) # print("test_size= ", test_size) print("len_goodlist \n", len(goodlist)) # Reset Variables and environment # (Re)initialise Q and N based on with_guidance paramter # HCparams if with_guidance == True: Q, N = initialise_guided_Q_N( g, init_Q, init_Q / 2, 1) #(g, init_Qval, guiding_Qval, init_Nval) else: Q, N = initialise_Q_N( g, init_Q, 1) #(g, init_Qval, init_Nval) g.set_state(g.start_state) print("Q and N intialised!") print("$$$$ CHECK Q[g.start_state]= ", Q[g.start_state]) print_sorted_Qs_kvs(g, Q, query_state) #Learn Policy From Trajectory Data #if trajectory data is given, learn from it. otherwise just initilise a policy and go to refinemnet step. The latter becomes model-free QL if dt_size != 0: # for n_intrleave in range(Num_interleaves): # Q, N, policy, max_delQ_list_1 = Learn_policy_from_data(paths, g, Q, N, vel_field_data, nmodes, train_id_list, N_inc, num_actions =num_actions, ALPHA=ALPHA, method = method, num_passes = num_passes//Num_interleaves) # Q, N, policy, max_delQ_list_2 = Q_learning_Iters(Q, N, g, policy, vel_field_data, nmodes, train_id_list, N_inc, alpha=ALPHA, QIters=QL_Iters//Num_interleaves, # eps_0=eps_0, eps_dec_method = eps_dec_method) Q, N, policy, max_delQ_list_1 = Learn_policy_from_data( paths, g, Q, N, vel_field_data, nmodes, train_id_list, N_inc, num_actions=num_actions, ALPHA=ALPHA, method=method, num_passes=num_passes) print("Learned Policy from data") #Save policy Policy_path = dir_path + 'Policy_01' picklePolicy(policy, Policy_path) print("Policy written to file") # plot_max_Qvalues(Q, policy, X, Y, fpath = dir_path, fname = 'max_Qvalues', showfig = True) print("Plotted max Qvals") #Plot Policy # Fig_policy_path = dir_path+'Fig_'+ 'Policy01'+'.png' label_data = [F, ALPHA, init_Q, QL_Iters] QL_params_plot = policy, Q, init_Q, label_data, dir_path, 'pol_plot_1' plot_learned_policy(g, QL_params=QL_params_plot) # plot_all_policies(g, Q, policy, init_Q, label_data, full_file_path= Fig_policy_path ) #plot max_delQ plot_max_delQs(max_delQ_list_1, filename=dir_path + 'delQplot1') print("plotted learned policy and max_delQs") print_sorted_Qs_kvs(g, Q, query_state) else: if with_guidance == True: policy = initialise_policy_from_initQ(Q) else: policy = initialise_policy(g) # Times and Trajectories based on data and/or guidance t_list1, G0_list1, bad_count1 = plot_exact_trajectory_set( g, policy, X, Y, vel_field_data, nmodes, train_id_set, test_id_set, goodlist, fpath=dir_path, fname='Trajectories_before_exp') print("plotted exacte trajectory set") # Policy Refinement Step: Learn from Experience # Q, N, policy, max_delQ_list_2 = Q_learning_Iters(Q, N, g, policy, vel_field_data, nmodes, train_id_list, N_inc, alpha=ALPHA, QIters=QL_Iters, # eps_0=eps_0, eps_dec_method = eps_dec_method) print("Policy refined") #save Updated Policy Policy_path = dir_path + 'Policy_02' # Fig_policy_path = dir_path + 'Fig_' + 'Policy02' + '.png' picklePolicy(policy, Policy_path) QL_params_plot = policy, Q, init_Q, label_data, dir_path, 'pol_plot_2' plot_learned_policy(g, QL_params=QL_params_plot) print("Refined policy written to file") #plots after Experince # plot_max_delQs(max_delQ_list_2, filename= dir_path + 'delQplot2' ) t_list2, G0_list2, bad_count2 = plot_exact_trajectory_set( g, policy, X, Y, vel_field_data, nmodes, train_id_set, test_id_set, goodlist, fpath=dir_path, fname='Trajectories_after_exp') t_list3, G0_list3, bad_count3 = plot_and_return_exact_trajectory_set_train_data( g, policy, X, Y, vel_field_data, nmodes, train_id_list, fpath=dir_path, fname='Train_Trajectories_after_exp') t_list4, G0_list4, bad_count4 = plot_and_return_exact_trajectory_set_train_data( g, policy, X, Y, vel_field_data, nmodes, test_id_list, fpath=dir_path, fname='Test_Trajectories_after_exp') print( "plotted max delQs and exact traj set AFTER REFINEMENT" ) picklePolicy(Q, dir_path + 'Q2') picklePolicy(N, dir_path + 'N2') print_sorted_Qs_kvs(g, Q, query_state) #Results to be printed # avg_time1 = np.mean(t_list1) # std_time1 = np.std(t_list1) # avg_G01 = np.mean(G0_list1) # avg_time2 = np.mean(t_list2) # std_time2 = np.std(t_list2) # avg_G02 = np.mean(G0_list2) avg_time1, std_time1, cnt1, none_cnt1, none_cnt_perc1 = calc_mean_and_std_train_test( t_list1, train_id_set, test_id_set) avg_G01, _, _, _, _ = calc_mean_and_std_train_test( G0_list1, train_id_set, test_id_set) avg_time2, std_time2, cnt2, none_cnt2, none_cnt_perc2 = calc_mean_and_std_train_test( t_list2, train_id_set, test_id_set) avg_G02, _, _, _, _ = calc_mean_and_std_train_test( G0_list2, train_id_set, test_id_set) overall_bad_count1 = 'dummy_init' overall_bad_count2 = 'dummy_init' if QL_Iters != 0: overall_bad_count1 = ( bad_count1, str(bad_count1 * 100 / dt_size) + '%') overall_bad_count2 = ( bad_count2, str(bad_count2 * 100 / dt_size) + '%') t_end_case = time.time() case_runtime = round((t_end_case - t_start_case) / 60, 2) #mins #Print results to file picklePolicy(train_id_list, dir_path + 'train_id_list') picklePolicy(test_id_list, dir_path + 'test_id_list') str_Results1 = [ 'avg_time1', 'std_time1', 'overall_bad_count1', 'avg_G01' ] Results1 = [ avg_time1, std_time1, overall_bad_count1, avg_G01 ] str_Results2 = [ 'avg_time2', 'std_time2', 'overall_bad_count2', 'avg_G02' ] Results2 = [ avg_time2, std_time2, overall_bad_count2, avg_G02 ] Result_filename = dir_path + 'Results.txt' outputfile = open(Result_filename, 'w+') print("Before Experince ", file=outputfile) for i in range(len(Results1)): print(str_Results1[i] + ': ', Results1[i], file=outputfile) print(end="\n" * 3, file=outputfile) print("After Experince ", file=outputfile) for i in range(len(Results2)): print(str_Results2[i] + ': ', Results2[i], file=outputfile) print(end="\n" * 3, file=outputfile) print("Parameters: ", file=outputfile) for i in range(len(Params)): print(str_Params[i] + ': ', Params[i], file=outputfile) outputfile.close() #Print out times to file TrajTimes_filename = dir_path + 'TrajTimes1.txt' outputfile = open(TrajTimes_filename, 'w+') print(t_list1, file=outputfile) outputfile.close() Returns_filename = dir_path + 'G0list1.txt' outputfile = open(Returns_filename, 'w+') print(G0_list1, file=outputfile) outputfile.close() TrajTimes_filename = dir_path + 'TrajTimes2.txt' outputfile = open(TrajTimes_filename, 'w+') print(t_list2, file=outputfile) outputfile.close() Returns_filename = dir_path + 'G0list2.txt' outputfile = open(Returns_filename, 'w+') print(G0_list2, file=outputfile) outputfile.close() output_paramaters_ith_case = [ exp_num, method, num_actions, nt, dt, F, startpos, endpos, eps_0, ALPHA, eps_dec_method, N_inc, dt_size, with_guidance, init_Q, num_passes, QL_Iters, avg_time1[0], std_time1[0], avg_G01[0], none_cnt1[0], cnt1[0], none_cnt_perc1[0], #train stats avg_time2[0], std_time2[0], avg_G02[0], none_cnt2[0], cnt2[0], none_cnt_perc2[0], #train stats avg_time1[1], std_time1[1], avg_G01[1], none_cnt1[1], cnt1[1], none_cnt_perc1[1], #test stats avg_time2[1], std_time2[1], avg_G02[1], none_cnt2[1], cnt2[1], none_cnt_perc2[ 1], #test stats overall_bad_count1, overall_bad_count2, case_runtime ] # Exp No Method Num_actions nt dt F start_pos end_pos Eps_0 ALPHA dt_size_(train_size) V[start_pos] Mean_Time_over_5k Variance_Over_5K Bad Count DP_comput_time Mean_Glist # useless line now since append summary is done here itself output_parameters_all_cases.append( output_paramaters_ith_case) print("output_paramaters_ith_case\n") print(output_paramaters_ith_case) append_summary_to_summaryFile( join(ROOT_DIR, 'Experiments/Exp_summary_QL.csv'), output_paramaters_ith_case) picklePolicy(output_paramaters_ith_case, join(dir_path, 'output_paramaters')) RUN_QL_elpased_time = round( (time.time() - t_start_RUN_QL) / 60, 2) #Terminal Print print('Case_runtime= ', case_runtime) print('RUN_QL_elpased_time= ', RUN_QL_elpased_time, ' mins', end="\n" * 3) t_end_RUN_QL = time.time() RUN_QL_runtime = round((t_end_RUN_QL - t_start_RUN_QL) / 60, 2) print("RUN_QL_runtime: ", RUN_QL_runtime, " mins") return output_parameters_all_cases
def plot_learned_policy(g, DP_params=None, QL_params=None, vel_field_data=None, showfig=False): """ Plots learned policy :param g: grid object :param DP_params: [policy, filepath] :param QL_params: [policy, Q, init_Q, label_data, filepath] - details mentioned below :param showfig: whether you want to see fig during execution :return: """ """ QL_params: :param Q: Leared Q against which policy is plotted. This is needed just for a check in the QL case. TO plot policy only at those states which have been updated :param policy: Learned policy. :param init_Q: initial value for Q. Just like Q, required only for the QL policy plot :param label_data: Labels to put on fig. Currently requiered only for QL """ # TODO: check QL part for this DG3 # full_file_path = ROOT_DIR if DP_params == None and QL_params == None: print("Nothing to plot! Enter either DP or QL params !") return # set grid fig1 = plt.figure(figsize=(10, 10)) ax1 = fig1.add_subplot(1, 1, 1) minor_xticks = np.arange(g.xs[0] - 0.5 * g.dj, g.xs[-1] + 2 * g.dj, g.dj) minor_yticks = np.arange(g.ys[0] - 0.5 * g.di, g.ys[-1] + 2 * g.di, g.di) major_xticks = np.arange(g.xs[0], g.xs[-1] + 2 * g.dj, 5 * g.dj) major_yticks = np.arange(g.ys[0], g.ys[-1] + 2 * g.di, 5 * g.di) ax1.set_xticks(minor_xticks, minor=True) ax1.set_yticks(minor_yticks, minor=True) ax1.set_xticks(major_xticks) ax1.set_yticks(major_yticks) ax1.grid(which='major', color='#CCCCCC', linestyle='') ax1.grid(which='minor', color='#CCCCCC', linestyle='--') xtr = [] ytr = [] ax_list = [] ay_list = [] if QL_params != None: policy, Q, init_Q, label_data, full_file_path, fname = QL_params F, ALPHA, initq, QIters = label_data ax1.text(0.1, 9, 'F=(%s)' % F, fontsize=12) ax1.text(0.1, 8, 'ALPHA=(%s)' % ALPHA, fontsize=12) ax1.text(0.1, 7, 'initq=(%s)' % initq, fontsize=12) ax1.text(0.1, 6, 'QIters=(%s)' % QIters, fontsize=12) for s in Q.keys(): t, i, j = s # for a in Q[s].keys(): # if s[t]%2==0: # to print policy at time t = 0 a = policy[s] if not (Q[s][a] == init_Q / 2 or Q[s][a] == init_Q): # to plot policy of only updated states # t, i, j = s xtr.append(g.xs[j]) ytr.append(g.ys[g.ni - 1 - i]) # print("test", s, a_policy) ax, ay = action_to_quiver(a) ax_list.append(ax) ay_list.append(ay) # print(i,j,g.xs[j], g.ys[g.ni - 1 - i], ax, ay) plt.quiver(xtr, ytr, ax_list, ay_list) ax1.scatter(g.xs[g.start_state[2]], g.ys[g.ni - 1 - g.start_state[1]], c='g') ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r') fig1.savefig(full_file_path + fname + '.png', dpi=150) if showfig == True: plt.show() plt.cla() plt.close(fig1) if DP_params != None: policy, full_file_path = DP_params policy_plot_folder = createFolder(join(full_file_path, 'policy_plots')) for tt in range(g.nt - 1): ax_list = [] ay_list = [] vnetx_list = [] vnety_list = [] xtr = [] ytr = [] for s in g.ac_state_space(time=tt): a = policy[s] t, i, j = s xtr.append(g.xs[j]) ytr.append(g.ys[g.ni - 1 - i]) # print("test", s, a_policy) ax, ay = action_to_quiver(a) # if you enter vel_field_data, then the "net" "mean" vector will be plotted. if vel_field_data != None: vx = vel_field_data[0][t, i, j] vy = vel_field_data[1][t, i, j] vnetx = ax + vx vnety = ay + vy vnetx_list.append(vnetx) vnety_list.append(vnety) ax_list.append(ax) ay_list.append(ay) plt.quiver(xtr, ytr, ax_list, ay_list) ax1.scatter(g.xs[g.start_state[2]], g.ys[g.ni - 1 - g.start_state[1]], c='g') ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r') if showfig == True: plt.show() fig1.savefig(full_file_path + '/policy_plots/policy_plot_t' + str(tt), dpi=150) plt.clf() fig1.clf() if vel_field_data != None: plt.quiver(xtr, ytr, vnetx_list, vnety_list) ax1.scatter(g.xs[g.start_state[2]], g.ys[g.ni - 1 - g.start_state[1]], c='g') ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r') fig1.savefig(full_file_path + '/policy_plots/vnet_plot_t' + str(tt), dpi=150) plt.clf() fig1.clf() return