Ejemplo n.º 1
0
def exploit_nstep(params) -> None:
    path = os.getcwd() + "/data/save"
    steps = [1, 5, 10, 15, 20]

    for j in ['policy_loss', 'critic_loss', 'reward', 'duration']:
        mean_list = []
        std_list = []
        for i in steps:
            mean, std = plot_data(
                path + '/' + j + '_nstep_' + str(i) + '_' + params.env_name +
                '.txt', j + '_nstep_' + str(i))
            mean_list.append(mean[-1])
            std_list.append(std[-1])

        plt.title(params.env_name)
        plt.xlabel("Episodes")
        plt.ylabel(j)
        plt.legend(loc="lower right")
        plt.savefig(path + '/../results/' + j + '_nstep_' +
                    make_full_string(params) + '.pdf')
        plt.show()

        plt.plot(steps, mean_list, label="bias")
        plt.plot(steps, std_list, label="variance")
        plt.title(params.env_name)
        plt.xlabel("N in N-step")
        plt.ylabel('variance, bias')
        plt.legend(loc="lower right")
        plt.savefig(path + '/../results/bias_variance_' + j + '_' +
                    make_full_string(params) + '.pdf')
        plt.show()
Ejemplo n.º 2
0
def exploit_nstep_diff(params) -> None:
    path = os.getcwd() + "/data/save"
    steps = [1, 5, 10, 20]
    mean_list = []
    std_list = []
    for i in steps:
        mean, std = plot_data(
            path + '/diff_' + str(i) + '_' + params.env_name + '.txt',
            'nstep_' + str(i))
        # print('n:', i, ' mean :', mean[-1], ' std:', std[-1])
        mean_list.append(mean[-1])
        std_list.append(std[-1])

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("diff")
    plt.legend(loc="lower right")
    plt.savefig(path + '/../results/diff_nstep_' + make_full_string(params) +
                '.pdf')
    plt.show()

    plt.plot(steps, mean_list, label="bias")
    plt.plot(steps, std_list, label="variance")
    plt.title(params.env_name)
    plt.xlabel("N in N-step")
    plt.ylabel('variance, bias')
    plt.legend(loc="lower right")  # , bbox_to_anchor=(1, 0.5)
    plt.savefig(path + '/../results/bias_variance_' +
                make_full_string(params) + '.pdf')
    plt.show()
Ejemplo n.º 3
0
def plot_beta_results(params) -> None:
    path = os.getcwd() + "/data/save"
    for beta in [0.1, 0.5, 1.0, 5.0, 10.0]:
        plot_data(path + "/reward_" + str(beta) + '_' + params.env_name + '.txt', "reward " + str(beta))

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("Reward")
    plt.legend(loc="lower right")
    plt.savefig(path + '/../results/rewards_' + make_full_string(params) + '.pdf')
    plt.show()
Ejemplo n.º 4
0
def exploit_cov_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/covariance_" + '.txt', "cem covariance norm ")

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("Covariance Norm")
    plt.legend(loc="best")
    plt.savefig(path + '/../results/covariance_' + make_full_string(params) + '.pdf')
    plt.clf()
Ejemplo n.º 5
0
def exploit_reward_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/reward_" + params.study_name+study[i] + '_' + params.env_name + '.txt', params.study_name +'_'+"reward " + study[i])

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("Reward")
    plt.legend(loc="best")
    plt.savefig(path + '/../results/rewards_' + make_full_string(params) + '.pdf')
    plt.show()
Ejemplo n.º 6
0
def exploit_duration_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/duration_" + study[i] + '_' + params.env_name + '.txt', "duration " + study[i])

    plt.xlabel("Episodes")
    plt.ylabel("Duration")
    plt.legend(loc="lower right")
    plt.title(params.env_name)
    plt.savefig(path + '/../results/durations_' + make_full_string(params) + 'pg.pdf')
    plt.show()
Ejemplo n.º 7
0
def exploit_policy_loss_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/policy_loss_" + study[i] + '_' + params.env_name + '.txt', "policy loss " + study[i])

    plt.xlabel("Cycles")
    plt.ylabel("Loss")
    plt.legend(loc="lower right")
    plt.title(params.env_name)
    plt.savefig(path + '/../results/policy_loss_' + make_full_string(params) + 'pg.pdf')
    plt.show()
Ejemplo n.º 8
0
def exploit_angles_global_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/angle_" + params.study_name+study[i] + '_' + params.env_name + '.txt', "scalar product " )

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("Scalar Product")
    plt.legend(loc="best")
    plt.savefig(path + '/../results/angles_' + make_full_string(params) + '.pdf')
    plt.clf()
Ejemplo n.º 9
0
def exploit_distance_solo(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/reward_" + params.study_name+study[i] + '_' + params.env_name + '.txt', "distance")
        # plot_data(path + "/reward_" + 'evo_pg'+study[i] + '_' + params.env_name + '.txt', "reward evo_pg")

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("distance")
    plt.legend(loc="lower right")
    plt.savefig(path + '/../results/distances_' + make_full_string(params) + '.pdf')
    plt.clf()
Ejemplo n.º 10
0
def exploit_angles_global_full_comparison(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(path + "/angle_" + 'pg'+study[i] + '_' + params.env_name + '.txt', "scalar product pg")
        plot_data(path + "/angle_" + 'cem'+study[i] + '_' + params.env_name + '.txt', "scalar product cem")
        # plot_data(path + "/reward_" + 'evo_pg'+study[i] + '_' + params.env_name + '.txt', "reward evo_pg")

    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("scalar product")
    plt.legend(loc="lower right")
    plt.savefig(path + '/../results/angles_' + make_full_string(params) + '.pdf')
    plt.clf()
Ejemplo n.º 11
0
def exploit_critic_loss_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(
            path + "/critic_loss_" + study[i] + '_' + params.env_name + '.txt',
            "critic loss " + study[i])
    plt.xticks(range(params.nb_cycles - 1), labels=arange(1, params.nb_cycles))
    plt.xlabel("Cycles")
    plt.ylabel("Loss")
    plt.legend(loc="upper right")
    plt.title(params.env_name)
    plt.savefig(path + '/../results/critic_loss_' + make_full_string(params) +
                'pg.pdf')
    plt.show()
Ejemplo n.º 12
0
def exploit_reward_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        plot_data(
            path + "/reward_" + params.experiment + study[i] + '_' +
            params.env_name + '.txt', "reward " + params.experiment)
    if params.eval_freq != 1:
        plt.xticks(range((params.nb_cycles - 1) // params.eval_freq + 1),
                   labels=arange(0, params.nb_cycles,
                                 params.eval_freq).astype(str))
    plt.title(params.env_name)
    plt.xlabel("Episodes")
    plt.ylabel("Reward")
    plt.legend(loc="best")
    plt.savefig(path + '/../results/rewards_' + make_full_string(params) +
                '.pdf')
    plt.show()
Ejemplo n.º 13
0
def exploit_angles_full(params) -> None:
    path = os.getcwd() + "/data/save"
    study = params.gradients
    for i in range(len(study)):
        for cycle in range(params.nb_cycles):
            plot_data(path + "/gradient_angles_" +"#"+ str(cycle) +'.txt', params.study_name +'_'+"gradient_angles " )

            plt.title(params.env_name)
            plt.xlabel("traj_in_batch")
            plt.ylabel("angle")
            plt.legend(loc="best")
            plt.savefig(path + '/../results/gradient_angles/gradient_angles_'+'#'+str(cycle) + make_full_string(params) + '.pdf')
            plt.clf()