def study_regress(params) -> None:
    assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian'
                                  ], 'unsupported policy type'
    chrono = Chrono()
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1,
                                         params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1,
                                                params.lr_actor)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            regress(simu, policy, params.policy_type, 250, params.render)
            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            plot_critic(simu, critic, policy, study[i], '_post_', j)
            critic.save_model('data/critics/' + params.env_name + '#' +
                              params.team_name + '#' + study[i] + str(j) +
                              '.pt')
    chrono.stop()
Ejemplo n.º 2
0
def study_beta(params):
    simu = make_simu_from_params(params)
    for beta in [0.1, 0.5, 1.0, 5.0, 10.0]:
        print("beta:", beta)
        policy_loss_file, critic_loss_file = set_files(str(beta), simu.env_name)
        simu.env.set_file_name(str(beta) + '_' + simu.env_name)
        for i in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps)
            simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, "beta", beta)
def study_pg(params) -> None:
    """
    Start a study of the policy gradient algorithms
    :param params: the parameters of the study
    :return: nothing
    """
    #### MODIF : added discrete
    assert params.policy_type in [
        'bernoulli', 'normal', 'squashedGaussian', 'discrete'
    ], 'unsupported policy type'
    ####
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 100, 200, 1,
                                         params.lr_actor)
            #### MODIF : added the discrete policy
            elif params.policy_type == "discrete":
                if isinstance(simu.env.action_space, gym.spaces.box.Box):
                    nb_actions = int(simu.env.action_space.high[0] -
                                     simu.env.action_space.low[0] + 1)
                    print(
                        "Error : environment action space is not discrete :" +
                        str(simu.env.action_space))
                else:
                    nb_actions = simu.env.action_space.n
                policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions,
                                        params.lr_actor)
            ####
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 100, 200, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1,
                                                params.lr_actor)
            elif params.policy_type == "DDPG":
                policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor)
            # policy = policy.cuda()
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            if False:
                if params.policy_type == "normal":
                    plot_normal_histograms(policy, j, simu.env_name)
                else:
                    plot_weight_histograms(policy, j, simu.env_name)
        plot_critic(simu, critic, policy, study[i], '_post_', j)
        critic.save_model('data/critics/' + params.env_name + '#' +
                          params.team_name + '#' + study[i] + str(j) + '.pt')
    chrono.stop()