Ejemplo n.º 1
0
 def load_policies(self, folder) -> None:
     """
      :param: folder : name of the folder containing policies
      Output : none (policies of the folder stored in self.env_dict)        
      """
     listdir = os.listdir(folder)
     for policy_file in listdir:
         print(policy_file)
         pw = PolicyWrapper(GenericNet(), "", "", "", 0)
         policy = pw.load(folder + policy_file)
         if pw.env_name in self.env_dict:
             env = make_env(pw.env_name, pw.policy_type, pw.max_steps)
             env.set_reward_flag(False)
             env.set_duration_flag(False)
             scores = evaluate_pol(env, policy, False)
             self.score_dict[pw.env_name][scores.mean()] = [
                 pw.team_name, scores.std()
             ]
         else:
             env = make_env(pw.env_name, pw.policy_type, pw.max_steps)
             env.set_reward_flag(False)
             env.set_duration_flag(False)
             self.env_dict[pw.env_name] = env
             scores = evaluate_pol(env, policy, False)
             tmp_score_dict = {scores.mean(): [pw.team_name, scores.std()]}
             self.score_dict[pw.env_name] = tmp_score_dict
Ejemplo n.º 2
0
def load_policies(folder):
    """
    Sort the policies and add colors to each method
    :param: folder : name of the folder containing policies
    Output : array of policies sorted and array of colors
    """
    listdir = os.listdir(folder)
    policies = []
    listdir.sort(key=lambda x: x.split('#')[3])
    colors = []
    print("\nPolices loaded :")
    for policy_file in listdir:
        if policy_file.split('#')[1] == 'PG':
            colors.append("#ff7f0e")
        if policy_file.split('#')[1] == 'CEM':
            colors.append("#d62728")
        pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0)
        policy, _ = pw.load(directory + policy_file)
        policy = policy.get_weights()
        policies.append(policy)
    print("\n")
    env = (policy_file.split('#')[0]).split('/')[-1]
    policy = policy_file.split('#')[5]
    max_episode_steps = policy_file.split('#')[6]
    return policies, colors[1:], policy, env, max_episode_steps
Ejemplo n.º 3
0
def load_policies(folder):
    """
     :param: folder : name of the folder containing policies
     Output : none (policies of the folder stored in self.env_dict)
     """
    listdir = os.listdir(folder)
    policies = []
    for policy_file in listdir:
        pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0)
        policy,_ = pw.load(directory+policy_file)
        policy = policy.get_weights()
        policies.append(policy)
    return policies
Ejemplo n.º 4
0
def study_cem(params, starting_pol=None) -> None:
    """
    Start a study of CEM algorithms
    :param params: the parameters of the study
    :param starting_pol: initial policy
    :return: nothing
    """
    assert params.policy_type in ['squashedGaussian', 'normal',
                                  'beta'], 'unsupported policy type'
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    if params.nb_trajs_cem is not None:
        params.nb_trajs = params.nb_trajs_cem
    simu = make_simu_from_params(params)
    for i in range(1):  # len(study) Only sum here
        simu.env.set_file_name('cem' + study[i] + '_' + simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "beta":
                policy = BetaPolicy(simu.obs_size, 32, 64, 1)
            if starting_pol is not None:
                policy.set_weights(starting_pol[j])
            pw = PolicyWrapper(policy, j, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False)
            simu.train_cem(pw, params, policy)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False)
    chrono.stop()
Ejemplo n.º 5
0
def study_cem(params) -> None:
    """
    Start a sum study of cem
    :param params: the parameters of the study
    :return: nothing
    """

    assert params.policy_type in ['normal'], 'unsupported policy type'
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    simu.env.set_file_name(study[0] + '_' + simu.env_name)
    reward_file = None
    print("study : ", study)

    # defixed layers
    params.fix_layers = False

    print("cem study") # cem study
    chrono_cem = Chrono()
    for j in range(params.nb_repet):
        simu.env.reinit()
        if params.policy_type=="normal":
            policy = NormalPolicy(simu.obs_size, 24, 36, 1)
        pw = PolicyWrapper(policy, params.policy_type, simu.env_name, j,params.team_name, params.max_episode_steps)
        all_weights,all_rewards,all_pops,all_pops_scores,is_kept=simu.train(pw, params, policy, False, reward_file, "", study[0], 0, True)
    cem_time = chrono_cem.stop()
    return all_weights,all_rewards,all_pops,all_pops_scores,is_kept
Ejemplo n.º 6
0
def plot_critic_from_name(folder, file_name, policy) -> None:
    """
    Plot a critic from a file present into the given directory
    A policy is given to plot Q(s,a) critic using this policy for a
    :param folder: the given directory
    :param file_name: the name of the file
    :param policy: the given policy
    :return: nothing
    """
    complete_name = folder + file_name
    pw = PolicyWrapper(GenericNet(), "", "")
    critic = pw.load(complete_name)
    env_name = pw.env_name
    env, discrete = make_env(env_name, ["x", "y"])
    obs_size = env.observation_space.shape[0]
    picture_name = file_name + '_portrait.pdf'
    if not discrete:
        if obs_size == 1:
            plot_qfunction_1D(critic,
                              env,
                              plot=False,
                              save_figure=True,
                              figname=picture_name,
                              foldername='/critics/')
        else:
            plot_qfunction_ND(critic,
                              policy,
                              env,
                              plot=False,
                              save_figure=True,
                              figname=picture_name,
                              foldername='/critics/')
    else:
        if obs_size == 2:
            plot_vfunction_2D(critic,
                              env,
                              plot=False,
                              save_figure=True,
                              figname=picture_name,
                              foldername='/critics/')
        else:
            plot_vfunction_ND(critic,
                              env,
                              plot=False,
                              save_figure=True,
                              figname=picture_name,
                              foldername='/critics/')
def study_regress(params) -> None:
    assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian'
                                  ], 'unsupported policy type'
    chrono = Chrono()
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1,
                                         params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1,
                                                params.lr_actor)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            regress(simu, policy, params.policy_type, 250, params.render)
            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            plot_critic(simu, critic, policy, study[i], '_post_', j)
            critic.save_model('data/critics/' + params.env_name + '#' +
                              params.team_name + '#' + study[i] + str(j) +
                              '.pt')
    chrono.stop()
Ejemplo n.º 8
0
def study_beta(params):
    simu = make_simu_from_params(params)
    for beta in [0.1, 0.5, 1.0, 5.0, 10.0]:
        print("beta:", beta)
        policy_loss_file, critic_loss_file = set_files(str(beta), simu.env_name)
        simu.env.set_file_name(str(beta) + '_' + simu.env_name)
        for i in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps)
            simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, "beta", beta)
def study_pg(params) -> None:
    """
    Start a study of the policy gradient algorithms
    :param params: the parameters of the study
    :return: nothing
    """
    #### MODIF : added discrete
    assert params.policy_type in [
        'bernoulli', 'normal', 'squashedGaussian', 'discrete'
    ], 'unsupported policy type'
    ####
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 100, 200, 1,
                                         params.lr_actor)
            #### MODIF : added the discrete policy
            elif params.policy_type == "discrete":
                if isinstance(simu.env.action_space, gym.spaces.box.Box):
                    nb_actions = int(simu.env.action_space.high[0] -
                                     simu.env.action_space.low[0] + 1)
                    print(
                        "Error : environment action space is not discrete :" +
                        str(simu.env.action_space))
                else:
                    nb_actions = simu.env.action_space.n
                policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions,
                                        params.lr_actor)
            ####
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 100, 200, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1,
                                                params.lr_actor)
            elif params.policy_type == "DDPG":
                policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor)
            # policy = policy.cuda()
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            if False:
                if params.policy_type == "normal":
                    plot_normal_histograms(policy, j, simu.env_name)
                else:
                    plot_weight_histograms(policy, j, simu.env_name)
        plot_critic(simu, critic, policy, study[i], '_post_', j)
        critic.save_model('data/critics/' + params.env_name + '#' +
                          params.team_name + '#' + study[i] + str(j) + '.pt')
    chrono.stop()
Ejemplo n.º 10
0
    """
    policy = SquashedGaussianPolicy(env.observation_space.shape[0], 24, 36, 1,
                                    params.lr_actor)
    policy.set_weights(weights)
    state = env.reset()
    env.render(mode='rgb_array')
    for i in range(1000):
        action = policy.select_action(state, deterministic=True)
        print(action)
        next_state, reward, done, _ = env.step(action)
        env.render(mode='rgb_array')
        state = next_state
    print('finished rendering')
    # print("team: ", policy.team_name, "mean: ", scores.mean(), "std:", scores.std())


if __name__ == '__main__':
    args = get_args()
    print(args)

    pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0)

    env = make_env(args.env_name, args.policy_type, args.max_episode_steps)
    env = gym.wrappers.Monitor(env, './videos/PG_fin')

    directory = os.getcwd() + '/Models/'
    weights_vecs = load_policies(directory)
    for weights_vec in weights_vecs:
        render_pol(args, env, weights_vec)
    env.close()