def load_policies(self, folder) -> None: """ :param: folder : name of the folder containing policies Output : none (policies of the folder stored in self.env_dict) """ listdir = os.listdir(folder) for policy_file in listdir: print(policy_file) pw = PolicyWrapper(GenericNet(), "", "", "", 0) policy = pw.load(folder + policy_file) if pw.env_name in self.env_dict: env = make_env(pw.env_name, pw.policy_type, pw.max_steps) env.set_reward_flag(False) env.set_duration_flag(False) scores = evaluate_pol(env, policy, False) self.score_dict[pw.env_name][scores.mean()] = [ pw.team_name, scores.std() ] else: env = make_env(pw.env_name, pw.policy_type, pw.max_steps) env.set_reward_flag(False) env.set_duration_flag(False) self.env_dict[pw.env_name] = env scores = evaluate_pol(env, policy, False) tmp_score_dict = {scores.mean(): [pw.team_name, scores.std()]} self.score_dict[pw.env_name] = tmp_score_dict
def load_policies(folder): """ Sort the policies and add colors to each method :param: folder : name of the folder containing policies Output : array of policies sorted and array of colors """ listdir = os.listdir(folder) policies = [] listdir.sort(key=lambda x: x.split('#')[3]) colors = [] print("\nPolices loaded :") for policy_file in listdir: if policy_file.split('#')[1] == 'PG': colors.append("#ff7f0e") if policy_file.split('#')[1] == 'CEM': colors.append("#d62728") pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0) policy, _ = pw.load(directory + policy_file) policy = policy.get_weights() policies.append(policy) print("\n") env = (policy_file.split('#')[0]).split('/')[-1] policy = policy_file.split('#')[5] max_episode_steps = policy_file.split('#')[6] return policies, colors[1:], policy, env, max_episode_steps
def load_policies(folder): """ :param: folder : name of the folder containing policies Output : none (policies of the folder stored in self.env_dict) """ listdir = os.listdir(folder) policies = [] for policy_file in listdir: pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0) policy,_ = pw.load(directory+policy_file) policy = policy.get_weights() policies.append(policy) return policies
def study_cem(params, starting_pol=None) -> None: """ Start a study of CEM algorithms :param params: the parameters of the study :param starting_pol: initial policy :return: nothing """ assert params.policy_type in ['squashedGaussian', 'normal', 'beta'], 'unsupported policy type' chrono = Chrono() # cuda = torch.device('cuda') study = params.gradients if params.nb_trajs_cem is not None: params.nb_trajs = params.nb_trajs_cem simu = make_simu_from_params(params) for i in range(1): # len(study) Only sum here simu.env.set_file_name('cem' + study[i] + '_' + simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 32, 64, 1) elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 32, 64, 1) elif params.policy_type == "beta": policy = BetaPolicy(simu.obs_size, 32, 64, 1) if starting_pol is not None: policy.set_weights(starting_pol[j]) pw = PolicyWrapper(policy, j, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) simu.train_cem(pw, params, policy) # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) chrono.stop()
def study_cem(params) -> None: """ Start a sum study of cem :param params: the parameters of the study :return: nothing """ assert params.policy_type in ['normal'], 'unsupported policy type' # cuda = torch.device('cuda') study = params.gradients simu = make_simu_from_params(params) simu.env.set_file_name(study[0] + '_' + simu.env_name) reward_file = None print("study : ", study) # defixed layers params.fix_layers = False print("cem study") # cem study chrono_cem = Chrono() for j in range(params.nb_repet): simu.env.reinit() if params.policy_type=="normal": policy = NormalPolicy(simu.obs_size, 24, 36, 1) pw = PolicyWrapper(policy, params.policy_type, simu.env_name, j,params.team_name, params.max_episode_steps) all_weights,all_rewards,all_pops,all_pops_scores,is_kept=simu.train(pw, params, policy, False, reward_file, "", study[0], 0, True) cem_time = chrono_cem.stop() return all_weights,all_rewards,all_pops,all_pops_scores,is_kept
def plot_critic_from_name(folder, file_name, policy) -> None: """ Plot a critic from a file present into the given directory A policy is given to plot Q(s,a) critic using this policy for a :param folder: the given directory :param file_name: the name of the file :param policy: the given policy :return: nothing """ complete_name = folder + file_name pw = PolicyWrapper(GenericNet(), "", "") critic = pw.load(complete_name) env_name = pw.env_name env, discrete = make_env(env_name, ["x", "y"]) obs_size = env.observation_space.shape[0] picture_name = file_name + '_portrait.pdf' if not discrete: if obs_size == 1: plot_qfunction_1D(critic, env, plot=False, save_figure=True, figname=picture_name, foldername='/critics/') else: plot_qfunction_ND(critic, policy, env, plot=False, save_figure=True, figname=picture_name, foldername='/critics/') else: if obs_size == 2: plot_vfunction_2D(critic, env, plot=False, save_figure=True, figname=picture_name, foldername='/critics/') else: plot_vfunction_ND(critic, env, plot=False, save_figure=True, figname=picture_name, foldername='/critics/')
def study_regress(params) -> None: assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian' ], 'unsupported policy type' chrono = Chrono() study = params.gradients simu = make_simu_from_params(params) for i in range(len(study)): simu.env.set_file_name(study[i] + '_' + simu.env_name) policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "bernoulli": policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) elif params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) if not simu.discrete: act_size = simu.env.action_space.shape[0] critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic) else: critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic) # plot_critic(simu, critic, policy, study[i], '_ante_', j) regress(simu, policy, params.policy_type, 250, params.render) simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, study[i]) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) plot_critic(simu, critic, policy, study[i], '_post_', j) critic.save_model('data/critics/' + params.env_name + '#' + params.team_name + '#' + study[i] + str(j) + '.pt') chrono.stop()
def study_beta(params): simu = make_simu_from_params(params) for beta in [0.1, 0.5, 1.0, 5.0, 10.0]: print("beta:", beta) policy_loss_file, critic_loss_file = set_files(str(beta), simu.env_name) simu.env.set_file_name(str(beta) + '_' + simu.env_name) for i in range(params.nb_repet): simu.env.reinit() if params.policy_type == "bernoulli": policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) if not simu.discrete: act_size = simu.env.action_space.shape[0] critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic) else: critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic) pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, "beta", beta)
def study_pg(params) -> None: """ Start a study of the policy gradient algorithms :param params: the parameters of the study :return: nothing """ #### MODIF : added discrete assert params.policy_type in [ 'bernoulli', 'normal', 'squashedGaussian', 'discrete' ], 'unsupported policy type' #### chrono = Chrono() # cuda = torch.device('cuda') study = params.gradients simu = make_simu_from_params(params) for i in range(len(study)): simu.env.set_file_name(study[i] + '_' + simu.env_name) policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "bernoulli": policy = BernoulliPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) #### MODIF : added the discrete policy elif params.policy_type == "discrete": if isinstance(simu.env.action_space, gym.spaces.box.Box): nb_actions = int(simu.env.action_space.high[0] - simu.env.action_space.low[0] + 1) print( "Error : environment action space is not discrete :" + str(simu.env.action_space)) else: nb_actions = simu.env.action_space.n policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions, params.lr_actor) #### elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) elif params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) elif params.policy_type == "DDPG": policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor) # policy = policy.cuda() pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) if not simu.discrete: act_size = simu.env.action_space.shape[0] critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic) else: critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic) # plot_critic(simu, critic, policy, study[i], '_ante_', j) simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, study[i]) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) if False: if params.policy_type == "normal": plot_normal_histograms(policy, j, simu.env_name) else: plot_weight_histograms(policy, j, simu.env_name) plot_critic(simu, critic, policy, study[i], '_post_', j) critic.save_model('data/critics/' + params.env_name + '#' + params.team_name + '#' + study[i] + str(j) + '.pt') chrono.stop()
""" policy = SquashedGaussianPolicy(env.observation_space.shape[0], 24, 36, 1, params.lr_actor) policy.set_weights(weights) state = env.reset() env.render(mode='rgb_array') for i in range(1000): action = policy.select_action(state, deterministic=True) print(action) next_state, reward, done, _ = env.step(action) env.render(mode='rgb_array') state = next_state print('finished rendering') # print("team: ", policy.team_name, "mean: ", scores.mean(), "std:", scores.std()) if __name__ == '__main__': args = get_args() print(args) pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0) env = make_env(args.env_name, args.policy_type, args.max_episode_steps) env = gym.wrappers.Monitor(env, './videos/PG_fin') directory = os.getcwd() + '/Models/' weights_vecs = load_policies(directory) for weights_vec in weights_vecs: render_pol(args, env, weights_vec) env.close()