array_re = {} array_num_stps_eps = {} array_re_all = np.zeros((n_agents,times,num_stp_int)) array_re_max = np.zeros((n_agents,times,num_stp_int)) for i in range(n_agents): array_re[i] = np.mean([Q_list[i,m] for m in range(times)], axis=0) array_num_stps_eps[i] = np.mean([Num_stps_per_eps[i,m] for m in range(times)], axis=0) array_num_stps_eps[i] = np.repeat(np.arange(len(array_num_stps_eps[i])), array_num_stps_eps[i].astype(int)) with open('results/'+str(run)+'/array_re_'+str(i)+'.pkl', 'wb') as f: pickle.dump(array_re[i], f,protocol=2) with open('results/'+str(run)+'/array_num_stps_eps_'+str(i)+'.pkl', 'wb') as f: pickle.dump(array_num_stps_eps[i], f,protocol=2) for j in range(times): ls = Q_list[i,j] print(i,j) array_re_all[i,j,:] = relative_error_plot(NUM_EPISODES_def, Qstar, ls)[0] array[i,j,:]=plot_performance_curves(NUM_EPISODES_def,env, ls)[0] with open('results/'+str(run)+'/array.pkl', 'wb') as f: pickle.dump(array, f,protocol=2) array_mean = np.zeros((n_agents,num_stp_int)) for i in range(n_agents): array_mean[i]= np.mean(array[i], axis=0) with open('results/'+str(run)+'/array_mean_'+str(i)+'.pkl', 'wb') as f: pickle.dump(array_mean[i], f,protocol=2) list_qstar = [performance_plot(env, Qstar)]*num_stp_int plots('results',[array_mean[0], array_mean[1], list_qstar, array_mean[2], array_mean[3],array_mean[4]], ['LBQL','QL','OP', 'Double-QL','SQL', 'BCQL'], ['r','b', 'y', 'g', 'c','m'],
'.pkl', 'wb') as f: pickle.dump([Q_list[i, j], elapsed_time[i, j]], f, protocol=2) num_stp_int = int(np.ceil(NUM_STEPS_def / at)) array = np.zeros((n_agents, times, num_stp_int)) array_re = {} array_re_all = np.zeros((n_agents, times, num_stp_int)) for i in range(n_agents): array_re[i] = np.mean([Q_list[i, m] for m in range(times)], axis=0) with open('results/' + str(run) + '/array_re_' + str(i) + '.pkl', 'wb') as f: pickle.dump(array_re[i], f, protocol=2) for j in range(times): ls = Q_list[i, j] print(i, j) array_re_all[i, j, :] = relative_error_plot(at, NUM_STEPS_def, Qstar, ls)[0] array[i, j, :] = plot_performance_curves(at, NUM_STEPS_def, env, ls)[0] with open('results/' + str(run) + '/array.pkl', 'wb') as f: pickle.dump(array, f, protocol=2) array_mean = np.zeros((n_agents, num_stp_int)) for i in range(n_agents): array_mean[i] = np.mean(array[i], axis=0) with open('results/' + str(run) + '/array_mean_' + str(i) + '.pkl', 'wb') as f: pickle.dump(array_mean[i], f, protocol=2) # plt.plot(array_mean[i], label=str(i)) list_qstar = [performance_plot(env, Qstar)] * num_stp_int plots('results', [ array_mean[0], array_mean[1], list_qstar, array_mean[2], array_mean[3],