def exploit_nstep(params) -> None: path = os.getcwd() + "/data/save" steps = [1, 5, 10, 15, 20] for j in ['policy_loss', 'critic_loss', 'reward', 'duration']: mean_list = [] std_list = [] for i in steps: mean, std = plot_data( path + '/' + j + '_nstep_' + str(i) + '_' + params.env_name + '.txt', j + '_nstep_' + str(i)) mean_list.append(mean[-1]) std_list.append(std[-1]) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel(j) plt.legend(loc="lower right") plt.savefig(path + '/../results/' + j + '_nstep_' + make_full_string(params) + '.pdf') plt.show() plt.plot(steps, mean_list, label="bias") plt.plot(steps, std_list, label="variance") plt.title(params.env_name) plt.xlabel("N in N-step") plt.ylabel('variance, bias') plt.legend(loc="lower right") plt.savefig(path + '/../results/bias_variance_' + j + '_' + make_full_string(params) + '.pdf') plt.show()
def exploit_nstep_diff(params) -> None: path = os.getcwd() + "/data/save" steps = [1, 5, 10, 20] mean_list = [] std_list = [] for i in steps: mean, std = plot_data( path + '/diff_' + str(i) + '_' + params.env_name + '.txt', 'nstep_' + str(i)) # print('n:', i, ' mean :', mean[-1], ' std:', std[-1]) mean_list.append(mean[-1]) std_list.append(std[-1]) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("diff") plt.legend(loc="lower right") plt.savefig(path + '/../results/diff_nstep_' + make_full_string(params) + '.pdf') plt.show() plt.plot(steps, mean_list, label="bias") plt.plot(steps, std_list, label="variance") plt.title(params.env_name) plt.xlabel("N in N-step") plt.ylabel('variance, bias') plt.legend(loc="lower right") # , bbox_to_anchor=(1, 0.5) plt.savefig(path + '/../results/bias_variance_' + make_full_string(params) + '.pdf') plt.show()
def plot_beta_results(params) -> None: path = os.getcwd() + "/data/save" for beta in [0.1, 0.5, 1.0, 5.0, 10.0]: plot_data(path + "/reward_" + str(beta) + '_' + params.env_name + '.txt', "reward " + str(beta)) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("Reward") plt.legend(loc="lower right") plt.savefig(path + '/../results/rewards_' + make_full_string(params) + '.pdf') plt.show()
def exploit_cov_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/covariance_" + '.txt', "cem covariance norm ") plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("Covariance Norm") plt.legend(loc="best") plt.savefig(path + '/../results/covariance_' + make_full_string(params) + '.pdf') plt.clf()
def exploit_reward_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/reward_" + params.study_name+study[i] + '_' + params.env_name + '.txt', params.study_name +'_'+"reward " + study[i]) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("Reward") plt.legend(loc="best") plt.savefig(path + '/../results/rewards_' + make_full_string(params) + '.pdf') plt.show()
def exploit_duration_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/duration_" + study[i] + '_' + params.env_name + '.txt', "duration " + study[i]) plt.xlabel("Episodes") plt.ylabel("Duration") plt.legend(loc="lower right") plt.title(params.env_name) plt.savefig(path + '/../results/durations_' + make_full_string(params) + 'pg.pdf') plt.show()
def exploit_policy_loss_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/policy_loss_" + study[i] + '_' + params.env_name + '.txt', "policy loss " + study[i]) plt.xlabel("Cycles") plt.ylabel("Loss") plt.legend(loc="lower right") plt.title(params.env_name) plt.savefig(path + '/../results/policy_loss_' + make_full_string(params) + 'pg.pdf') plt.show()
def exploit_angles_global_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/angle_" + params.study_name+study[i] + '_' + params.env_name + '.txt', "scalar product " ) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("Scalar Product") plt.legend(loc="best") plt.savefig(path + '/../results/angles_' + make_full_string(params) + '.pdf') plt.clf()
def exploit_distance_solo(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/reward_" + params.study_name+study[i] + '_' + params.env_name + '.txt', "distance") # plot_data(path + "/reward_" + 'evo_pg'+study[i] + '_' + params.env_name + '.txt', "reward evo_pg") plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("distance") plt.legend(loc="lower right") plt.savefig(path + '/../results/distances_' + make_full_string(params) + '.pdf') plt.clf()
def exploit_angles_global_full_comparison(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data(path + "/angle_" + 'pg'+study[i] + '_' + params.env_name + '.txt', "scalar product pg") plot_data(path + "/angle_" + 'cem'+study[i] + '_' + params.env_name + '.txt', "scalar product cem") # plot_data(path + "/reward_" + 'evo_pg'+study[i] + '_' + params.env_name + '.txt', "reward evo_pg") plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("scalar product") plt.legend(loc="lower right") plt.savefig(path + '/../results/angles_' + make_full_string(params) + '.pdf') plt.clf()
def exploit_critic_loss_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data( path + "/critic_loss_" + study[i] + '_' + params.env_name + '.txt', "critic loss " + study[i]) plt.xticks(range(params.nb_cycles - 1), labels=arange(1, params.nb_cycles)) plt.xlabel("Cycles") plt.ylabel("Loss") plt.legend(loc="upper right") plt.title(params.env_name) plt.savefig(path + '/../results/critic_loss_' + make_full_string(params) + 'pg.pdf') plt.show()
def exploit_reward_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): plot_data( path + "/reward_" + params.experiment + study[i] + '_' + params.env_name + '.txt', "reward " + params.experiment) if params.eval_freq != 1: plt.xticks(range((params.nb_cycles - 1) // params.eval_freq + 1), labels=arange(0, params.nb_cycles, params.eval_freq).astype(str)) plt.title(params.env_name) plt.xlabel("Episodes") plt.ylabel("Reward") plt.legend(loc="best") plt.savefig(path + '/../results/rewards_' + make_full_string(params) + '.pdf') plt.show()
def exploit_angles_full(params) -> None: path = os.getcwd() + "/data/save" study = params.gradients for i in range(len(study)): for cycle in range(params.nb_cycles): plot_data(path + "/gradient_angles_" +"#"+ str(cycle) +'.txt', params.study_name +'_'+"gradient_angles " ) plt.title(params.env_name) plt.xlabel("traj_in_batch") plt.ylabel("angle") plt.legend(loc="best") plt.savefig(path + '/../results/gradient_angles/gradient_angles_'+'#'+str(cycle) + make_full_string(params) + '.pdf') plt.clf()