def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'max_replay_buffer_size': params['max_replay_buffer_size'], } self.params = params self.params['agent_class'] = BCAgent self.params['agent_params'] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ####################### ## LOAD EXPERT POLICY ####################### print('Loading expert policy from...', self.params['expert_policy_file']) self.loaded_expert_policy = Loaded_Gaussian_Policy( self.rl_trainer.sess, self.params['expert_policy_file']) print('Done restoring expert policy...')
class Q_Trainer(object): def __init__(self, params): self.params = params train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], 'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'], 'train_batch_size': params['batch_size'], 'double_q': params['double_q'], } env_args = get_env_kwargs(params['env_name'], params['lunar_arch']) self.agent_params = {**train_args, **env_args, **params} self.params['agent_class'] = DQNAgent self.params['agent_params'] = self.agent_params self.params['train_batch_size'] = params['batch_size'] self.params['env_wrappers'] = self.agent_params['env_wrappers'] self.rl_trainer = RL_Trainer(self.params) def run_training_loop(self): self.rl_trainer.run_training_loop( self.agent_params['num_timesteps'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, )
class PG_Trainer(object): def __init__(self, params): ##################### ## SET AGENT PARAMS ##################### computation_graph_args = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], } estimate_advantage_args = { 'gamma': params['discount'], 'standardize_advantages': not (params['dont_standardize_advantages']), 'reward_to_go': params['reward_to_go'], 'nn_baseline': params['nn_baseline'], } train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], } agent_params = { **computation_graph_args, **estimate_advantage_args, **train_args } self.params = params self.params['agent_class'] = PGAgent self.params['agent_params'] = agent_params self.params['batch_size_initial'] = self.params['batch_size'] ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) def run_training_loop(self): self.rl_trainer.run_training_loop( self.params['n_iter'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, )
class BC_Trainer(object): def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'max_replay_buffer_size': params['max_replay_buffer_size'], } self.params = params self.params['agent_class'] = BCAgent self.params['agent_params'] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ####################### ## LOAD EXPERT POLICY ####################### print('Loading expert policy from...', self.params['expert_policy_file']) self.loaded_expert_policy = Loaded_Gaussian_Policy( self.rl_trainer.sess, self.params['expert_policy_file']) print('Done restoring expert policy...') def run_training_loop(self): self.rl_trainer.run_training_loop( n_iter=self.params['n_iter'], initial_expertdata=self.params['expert_data'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, relabel_with_expert=self.params['do_dagger'], expert_policy=self.loaded_expert_policy, )
def __init__(self, params): ##################### ## SET AGENT PARAMS ##################### computation_graph_args = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'num_target_updates': params['num_target_updates'], 'num_grad_steps_per_target_update': params['num_grad_steps_per_target_update'], } estimate_advantage_args = { 'gamma': params['discount'], 'standardize_advantages': not(params['dont_standardize_advantages']), } train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], 'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'], 'num_actor_updates_per_agent_update': params['num_actor_updates_per_agent_update'], } agent_params = {**computation_graph_args, **estimate_advantage_args, **train_args} self.params = params self.params['agent_class'] = ACAgent self.params['agent_params'] = agent_params self.params['batch_size_initial'] = self.params['batch_size'] ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params)
def __init__(self, params): self.params = params train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], 'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'], 'train_batch_size': params['batch_size'], 'double_q': params['double_q'], } env_args = get_env_kwargs(params['env_name'], params['lunar_arch']) self.agent_params = {**train_args, **env_args, **params} self.params['agent_class'] = DQNAgent self.params['agent_params'] = self.agent_params self.params['train_batch_size'] = params['batch_size'] self.params['env_wrappers'] = self.agent_params['env_wrappers'] self.rl_trainer = RL_Trainer(self.params)