Beispiel #1
0
array_num_stps_eps = {}
array_re_all = np.zeros((n_agents,times,num_stp_int))
array_re_max = np.zeros((n_agents,times,num_stp_int))
for i in range(n_agents):
    array_re[i] = np.mean([Q_list[i,m] for m in range(times)], axis=0)
    array_num_stps_eps[i] = np.mean([Num_stps_per_eps[i,m] for m in range(times)], axis=0)
    array_num_stps_eps[i] = np.repeat(np.arange(len(array_num_stps_eps[i])), array_num_stps_eps[i].astype(int))
    with open('results/'+str(run)+'/array_re_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_re[i], f,protocol=2)
    with open('results/'+str(run)+'/array_num_stps_eps_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_num_stps_eps[i], f,protocol=2)            
    for j in range(times):
        ls = Q_list[i,j]
        print(i,j)
        array_re_all[i,j,:] = relative_error_plot(NUM_EPISODES_def, Qstar, ls)[0]
        array[i,j,:]=plot_performance_curves(NUM_EPISODES_def,env, ls)[0]


with open('results/'+str(run)+'/array.pkl', 'wb') as f:  
                pickle.dump(array, f,protocol=2)       
array_mean = np.zeros((n_agents,num_stp_int))
for i in range(n_agents):
    array_mean[i]= np.mean(array[i], axis=0)
    with open('results/'+str(run)+'/array_mean_'+str(i)+'.pkl', 'wb') as f:  
                pickle.dump(array_mean[i], f,protocol=2)
    
list_qstar = [performance_plot(env, Qstar)]*num_stp_int
plots('results',[array_mean[0], array_mean[1], list_qstar, array_mean[2], array_mean[3],array_mean[4]],
      ['LBQL','QL','OP', 'Double-QL','SQL', 'BCQL'],
      ['r','b', 'y', 'g', 'c','m'],
      'Episodes', 
Beispiel #2
0
                            f,
                            protocol=2)
        else:
            Q_list[i, j], elapsed_time[i, j] = agent.train()
            with open(
                    'results/' + str(run) + '/' + str(i) + '_' + str(j) +
                    '.pkl', 'wb') as f:
                pickle.dump([Q_list[i, j], elapsed_time[i, j]], f, protocol=2)

num_stp_int = int(np.ceil(NUM_STEPS_def / at))
array = np.zeros((n_agents, times, num_stp_int))
for i in range(n_agents):
    for j in range(times):
        ls = Q_list[i, j]
        print(i, j)
        array[i, j, :] = plot_performance_curves(at, NUM_STEPS_def, env, ls)[0]

with open('results/' + str(run) + '/array.pkl', 'wb') as f:
    pickle.dump(array, f, protocol=2)
fig1 = plt.figure()
Epoch = np.tile(np.arange(0, array.shape[2]),
                (array.shape[0] * array.shape[1]))
Alg = np.repeat(['LBQL', 'QL', 'SQL', 'Double-QL', 'BCQL'],
                array.shape[1] * array.shape[2])

Returns = array.ravel()
df = pd.DataFrame()
df = pd.DataFrame(columns=['Epoch', 'Algo', 'Reward'])
df['Epoch'] = Epoch
df['Algo'] = Alg
df['Reward'] = Returns