예제 #1
0
class AC_Trainer(object):
    def __init__(self, params):

        #####################
        ## SET AGENT PARAMS
        #####################

        computation_graph_args = {
            'n_layers':
            params['n_layers'],
            'size':
            params['size'],
            'learning_rate':
            params['learning_rate'],
            'num_target_updates':
            params['num_target_updates'],
            'num_grad_steps_per_target_update':
            params['num_grad_steps_per_target_update'],
        }

        estimate_advantage_args = {
            'gamma': params['discount'],
            'standardize_advantages':
            not (params['dont_standardize_advantages']),
        }

        train_args = {
            'num_agent_train_steps_per_iter':
            params['num_agent_train_steps_per_iter'],
            'num_critic_updates_per_agent_update':
            params['num_critic_updates_per_agent_update'],
            'num_actor_updates_per_agent_update':
            params['num_actor_updates_per_agent_update'],
        }

        agent_params = {
            **computation_graph_args,
            **estimate_advantage_args,
            **train_args
        }

        self.params = params
        self.params['agent_class'] = ACAgent
        self.params['agent_params'] = agent_params
        self.params['batch_size_initial'] = self.params['batch_size']

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)

    def run_training_loop(self):

        self.rl_trainer.run_training_loop(
            self.params['n_iter'],
            collect_policy=self.rl_trainer.agent.actor,
            eval_policy=self.rl_trainer.agent.actor,
        )
        self.rl_trainer.eval_render()
예제 #2
0
class Q_Trainer(object):
    def __init__(self, params):
        self.params = params

        train_args = {
            'num_agent_train_steps_per_iter':
            params['num_agent_train_steps_per_iter'],
            'num_critic_updates_per_agent_update':
            params['num_critic_updates_per_agent_update'],
            'train_batch_size':
            params['batch_size'],
            'double_q':
            params['double_q'],
        }

        env_args = get_env_kwargs(params['env_name'])

        self.agent_params = {**train_args, **env_args, **params}

        self.params['agent_class'] = DQNAgent
        self.params['agent_params'] = self.agent_params
        self.params['train_batch_size'] = params['batch_size']
        self.params['env_wrappers'] = self.agent_params['env_wrappers']

        self.rl_trainer = RL_Trainer(self.params)

    def run_training_loop(self):
        self.rl_trainer.run_training_loop(
            self.agent_params['num_timesteps'],
            collect_policy=self.rl_trainer.agent.actor,
            eval_policy=self.rl_trainer.agent.actor,
        )
        self.rl_trainer.eval_render()
class PG_Trainer(object):
    def __init__(self, params):

        #####################
        ## SET AGENT PARAMS
        #####################

        computation_graph_args = {
            'n_layers': params['n_layers'],
            'size': params['size'],
            'learning_rate': params['learning_rate'],
        }
        estimate_advantage_args = {
            'gamma': params['discount'],
            'standardize_advantages': params['standardize_advantages'],
            'reward_to_go': params['reward_to_go'],
            'nn_baseline': params['nn_baseline'],
            'gae': params['gae'],
            'gae_gamma': params['gae_gamma'],
            'gae_lambda': params['gae_lambda']
        }

        train_args = {
            'num_agent_train_steps_per_iter':
            params['num_agent_train_steps_per_iter'],
        }

        agent_params = {
            **computation_graph_args,
            **estimate_advantage_args,
            **train_args
        }

        self.params = params
        self.params['agent_class'] = PGAgent
        self.params['agent_params'] = agent_params
        self.params['batch_size_initial'] = self.params['batch_size']

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)

    def run_training_loop(self):

        self.rl_trainer.run_training_loop(
            self.params['n_iter'],
            collect_policy=self.rl_trainer.agent.actor,
            eval_policy=self.rl_trainer.agent.actor,
        )
        if self.params['render_after_training'] == 1:
            self.rl_trainer.eval_render(self.rl_trainer.agent.actor)

    def load_trained_agent_render(self):
        self.rl_trainer.agent.actor.restore(
            '/home/kim/cs285_ws/homework_fall2019/hw2/cs285/data/pg_todo_CartPole-v0_15-01-2020_15-42-29/policy_itr_99'
        )
        self.rl_trainer.eval_render(self.rl_trainer.agent.actor)
class BC_Trainer(object):

    def __init__(self, params):

        #######################
        ## AGENT PARAMS
        #######################

        agent_params = {
            'n_layers': params['n_layers'],
            'size': params['size'],
            'learning_rate': params['learning_rate'],
            'max_replay_buffer_size': params['max_replay_buffer_size'],
            }

        self.params = params
        self.params['agent_class'] = BCAgent ## TODO: look in here and implement this
        self.params['agent_params'] = agent_params

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params) ## TODO: look in here and implement this

        #######################
        ## LOAD EXPERT POLICY
        #######################

        print('Loading expert policy from...', self.params['expert_policy_file'])
        self.loaded_expert_policy = Loaded_Gaussian_Policy(self.rl_trainer.sess, self.params['expert_policy_file'])
        print('Done restoring expert policy...')

    def run_training_loop(self):

        self.rl_trainer.run_training_loop(
            n_iter=self.params['n_iter'],
            initial_expertdata=self.params['expert_data'],
            collect_policy=self.rl_trainer.agent.actor,
            eval_policy=self.rl_trainer.agent.actor,
            relabel_with_expert=self.params['do_dagger'],
            expert_policy=self.loaded_expert_policy,
        )
        if self.params['render_after_training'] == 1:
            self.rl_trainer.eval_render(self.rl_trainer.agent.actor)