コード例 #1
0
array_re = {}
array_num_stps_eps = {}
array_re_all = np.zeros((n_agents,times,num_stp_int))
array_re_max = np.zeros((n_agents,times,num_stp_int))
for i in range(n_agents):
    array_re[i] = np.mean([Q_list[i,m] for m in range(times)], axis=0)
    array_num_stps_eps[i] = np.mean([Num_stps_per_eps[i,m] for m in range(times)], axis=0)
    array_num_stps_eps[i] = np.repeat(np.arange(len(array_num_stps_eps[i])), array_num_stps_eps[i].astype(int))
    with open('results/'+str(run)+'/array_re_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_re[i], f,protocol=2)
    with open('results/'+str(run)+'/array_num_stps_eps_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_num_stps_eps[i], f,protocol=2)            
    for j in range(times):
        ls = Q_list[i,j]
        print(i,j)
        array_re_all[i,j,:] = relative_error_plot(NUM_EPISODES_def, Qstar, ls)[0]
        array[i,j,:]=plot_performance_curves(NUM_EPISODES_def,env, ls)[0]


with open('results/'+str(run)+'/array.pkl', 'wb') as f:  
                pickle.dump(array, f,protocol=2)       
array_mean = np.zeros((n_agents,num_stp_int))
for i in range(n_agents):
    array_mean[i]= np.mean(array[i], axis=0)
    with open('results/'+str(run)+'/array_mean_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_mean[i], f,protocol=2)
    
list_qstar = [performance_plot(env, Qstar)]*num_stp_int
plots('results',[array_mean[0], array_mean[1], list_qstar, array_mean[2], array_mean[3],array_mean[4]],
      ['LBQL','QL','OP', 'Double-QL','SQL', 'BCQL'],
      ['r','b', 'y', 'g', 'c','m'],
コード例 #2
0
ファイル: run.py プロジェクト: zhouforst/LBQL_ICML2020
                    '.pkl', 'wb') as f:
                pickle.dump([Q_list[i, j], elapsed_time[i, j]], f, protocol=2)

num_stp_int = int(np.ceil(NUM_STEPS_def / at))
array = np.zeros((n_agents, times, num_stp_int))
array_re = {}
array_re_all = np.zeros((n_agents, times, num_stp_int))
for i in range(n_agents):
    array_re[i] = np.mean([Q_list[i, m] for m in range(times)], axis=0)
    with open('results/' + str(run) + '/array_re_' + str(i) + '.pkl',
              'wb') as f:
        pickle.dump(array_re[i], f, protocol=2)
    for j in range(times):
        ls = Q_list[i, j]
        print(i, j)
        array_re_all[i, j, :] = relative_error_plot(at, NUM_STEPS_def, Qstar,
                                                    ls)[0]
        array[i, j, :] = plot_performance_curves(at, NUM_STEPS_def, env, ls)[0]

with open('results/' + str(run) + '/array.pkl', 'wb') as f:
    pickle.dump(array, f, protocol=2)
array_mean = np.zeros((n_agents, num_stp_int))
for i in range(n_agents):
    array_mean[i] = np.mean(array[i], axis=0)
    with open('results/' + str(run) + '/array_mean_' + str(i) + '.pkl',
              'wb') as f:
        pickle.dump(array_mean[i], f, protocol=2)
#    plt.plot(array_mean[i], label=str(i))

list_qstar = [performance_plot(env, Qstar)] * num_stp_int
plots('results', [
    array_mean[0], array_mean[1], list_qstar, array_mean[2], array_mean[3],