def _initialize(self): """ implements AutoInitializeMeta meta class. self.module_dict can only happen after the module is constructed by subclasses. """ host, port = os.environ['SYMPH_PS_FRONTEND_HOST'], os.environ['SYMPH_PS_FRONTEND_PORT'] self._module_dict = self.module_dict() if not isinstance(self._module_dict, ModuleDict): self._module_dict = ModuleDict(self._module_dict) self._ps_client = ParameterClient( host=host, port=port, )
def _init_noise(self): """ initializes exploration noise and populates self.noise, a callable that returns noise of dimension same as action """ if self.agent_mode == 'eval_deterministic': return if self.noise_type == 'normal': self.noise = NormalActionNoise( np.zeros(self.action_dim), np.ones(self.action_dim) * self.sigma) elif self.noise_type == 'ou_noise': self.noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(self.action_dim), sigma=self.sigma, theta=self.learner_config.algo.exploration.theta, dt=self.learner_config.algo.exploration.dt) else: raise ConfigError('Noise type {} undefined.'.format( self.noise_type)) if self.param_noise_type == 'normal': self.param_noise = NormalParameterNoise(self.param_noise_sigma) elif self.param_noise_type == 'adaptive_normal': model_copy = copy.deepcopy(self.model) module_dict_copy = ModuleDict(self.module_dict(model_copy)) self.param_noise = AdaptiveNormalParameterNoise( model_copy, module_dict_copy, self.param_noise_target_stddev, alpha=self.param_noise_alpha, sigma=self.param_noise_sigma)
class Agent(object, metaclass=U.AutoInitializeMeta): """ Important: When extending this class, make sure to follow the init method signature so that orchestrating functions can properly initialize custom agents. TODO: Extend the initilization to allow custom non-config per-agent settings. To be used to have a heterogeneous agent population """ def __init__(self, learner_config, env_config, session_config, agent_id, agent_mode, render=False): """ Initialize the agent class, """ self.learner_config = learner_config self.env_config = env_config self.session_config = session_config assert agent_mode in AGENT_MODES self.agent_mode = agent_mode self.agent_id = agent_id if self.agent_mode not in [ 'eval_deterministic_local', 'eval_stochastic_local' ]: self._setup_parameter_pull() self._setup_logging() self.current_episode = 0 self.cumulative_steps = 0 self.current_step = 0 self.actions_since_param_update = 0 self.episodes_since_param_update = 0 self.render = render ####### # Internal initialization methods ####### def _initialize(self): """ implements AutoInitializeMeta meta class. self.module_dict can only happen after the module is constructed by subclasses. """ if self.agent_mode not in [ 'eval_deterministic_local', 'eval_stochastic_local' ]: host, port = os.environ['SYMPH_PS_FRONTEND_HOST'], os.environ[ 'SYMPH_PS_FRONTEND_PORT'] self._module_dict = self.module_dict() if not isinstance(self._module_dict, ModuleDict): self._module_dict = ModuleDict(self._module_dict) self._ps_client = ParameterClient( host=host, port=port, ) def _setup_parameter_pull(self): self._fetch_parameter_mode = self.session_config.agent.fetch_parameter_mode self._fetch_parameter_interval = self.session_config.agent.fetch_parameter_interval self._fetch_parameter_tracker = PeriodicTracker( self._fetch_parameter_interval) def _setup_logging(self): """ Creates tensorplex logger and loggerplex logger Initializes bookkeeping values """ if self.agent_mode == 'training': logger_name = 'agent-{}'.format(self.agent_id) self.tensorplex = self._get_tensorplex('{}/{}'.format( 'agent', self.agent_id)) else: logger_name = 'eval-{}'.format(self.agent_id) self.tensorplex = self._get_tensorplex('{}/{}'.format( 'eval', self.agent_id)) self.log = get_loggerplex_client(logger_name, self.session_config) # record how long the current parameter have been used self.actions_since_param_update = 0 self.episodes_since_param_update = 0 # Weighted Average over ~100 parameter updates. self.actions_per_param_update = U.MovingAverageRecorder(decay=0.99) self.episodes_per_param_update = U.MovingAverageRecorder(decay=0.99) def _get_tensorplex(self, name): """ Get the periodic tensorplex object Args: @name: The name of the collection of metrics """ tp = get_tensorplex_client(name, self.session_config) periodic_tp = PeriodicTensorplex( tensorplex=tp, period=self.session_config.tensorplex.update_schedule.agent, is_average=True, keep_full_history=False) return periodic_tp ####### # Exposed abstract methods # Override in subclass, no need to call super().act etc. # Enough for basic usage ####### def act(self, obs): """ Abstract method for taking actions. You should check `self.agent_mode` in the function and change act() logic with respect to training VS evaluation. Args: obs: typically a single obs, make sure to vectorize it first before passing to the torch `model`. Returns: action to be executed in the env """ raise NotImplementedError def module_dict(self): """ Returns: a dict of name -> surreal.utils.pytorch.Module """ raise NotImplementedError ####### # Advanced exposed methods # Override in subclass, NEED to call super().on_parameter_fetched() etc. # User need to take care of agent mode # For advanced usage ####### def on_parameter_fetched(self, params, info): """ Called when a new parameter is fetched. """ if self.agent_mode == 'training': # The time it takes for parameter to go from learner to agent delay = time.time() - info['time'] self.actions_per_param_update.add_value( self.actions_since_param_update) self.episodes_per_param_update.add_value( self.episodes_since_param_update) self.tensorplex.add_scalars({ '.core/parameter_publish_delay_s': delay, '.core/actions_per_param_update': self.actions_per_param_update.cur_value(), '.core/episodes_per_param_update': self.episodes_per_param_update.cur_value() }) self.actions_since_param_update = 0 self.episodes_since_param_update = 0 return params def pre_action(self, obs): """ Called before act is called by agent main script """ if self.agent_mode == 'training': if self._fetch_parameter_mode == 'step' and \ self._fetch_parameter_tracker.track_increment(): self.fetch_parameter() def post_action(self, obs, action, obs_next, reward, done, info): """ Called after act is called by agent main script """ self.current_step += 1 self.cumulative_steps += 1 if self.agent_mode == 'training': self.actions_since_param_update += 1 if done: self.episodes_since_param_update += 1 def pre_episode(self): """ Called by agent process. Can beused to reset internal states before an episode starts """ if self.agent_mode == 'training': if self._fetch_parameter_mode == 'episode' and \ self._fetch_parameter_tracker.track_increment(): self.fetch_parameter() def post_episode(self): """ Called by agent process. Can beused to reset internal states after an episode ends I.e. after the post_action when done = True """ self.current_episode += 1 ####### # Main loops. # Customize this to fully customize the agent process ####### def main(self): """ Default Main loop Args: @env: the environment to run agent on """ self.main_setup() while True: self.main_loop() def main_setup(self): """ Setup before constant looping """ env = self.get_env() env = self.prepare_env(env) self.env = env if self.agent_mode == "training": self.fetch_parameter() def main_loop(self): """ One loop of agent, runs one episode of the environment """ env = self.env self.pre_episode() obs, info = env.reset() total_reward = 0.0 while True: if self.render: env.unwrapped.render( ) # TODO: figure out why it needs to be unwrapped self.pre_action(obs) action = self.act(obs) obs_next, reward, done, info = env.step(action) total_reward += reward self.post_action(obs, action, obs_next, reward, done, info) obs = obs_next if done: break self.post_episode() if self.agent_mode in [ 'eval_deterministic_local', 'eval_stochastic_local' ]: return if self.current_episode % 20 == 0: self.log.info('Episode {} reward {}'.format( self.current_episode, total_reward)) def get_env(self): """ Returns a subclass of EnvBase, created from self.env_config """ if self.agent_mode in ['eval_deterministic', 'eval_stochastic']: env, _ = make_env(self.env_config, mode='eval') else: env, _ = make_env(self.env_config) return env def prepare_env(self, env): """ Applies custom wrapper to the environment as necessary Args: @env: subclass of EnvBse Returns: @env: The (possibly wrapped) environment """ if self.agent_mode == 'training': return self.prepare_env_agent(env) else: return self.prepare_env_eval(env) def prepare_env_agent(self, env): """ Applies custom wrapper to the environment as necessary Only changes agent behavior """ # This has to go first as it alters step() return value limit_episode_length = self.env_config.limit_episode_length if limit_episode_length > 0: env = MaxStepWrapper(env, limit_episode_length) env = TrainingTensorplexMonitor(env, agent_id=self.agent_id, session_config=self.session_config, separate_plots=True) return env def prepare_env_eval(self, env): """ Applies custom wrapper to the environment as necessary Only changes eval behavior """ limit_episode_length = self.env_config.limit_episode_length if limit_episode_length > 0: env = MaxStepWrapper(env, limit_episode_length) if self.agent_mode not in [ 'eval_deterministic_local', 'eval_stochastic_local' ]: env = EvalTensorplexMonitor( env, eval_id=self.agent_id, fetch_parameter=self.fetch_parameter, session_config=self.session_config, ) env_category = self.env_config.env_name.split(':')[0] if self.env_config.video.record_video and self.agent_id == 0: # gym video recording not supported due to bug in OpenAI gym # https://github.com/openai/gym/issues/1050 env = VideoWrapper(env, self.env_config, self.session_config) return env def main_agent(self): """ Main loop ran by the agent script Override if you want to customize agent behavior completely """ self.main() def main_eval(self): """ Main loop ran by the eval script Override if you want to customize eval behavior completely """ self.main() ####### # Exposed public methods ####### def fetch_parameter(self): """ Extends base class fetch_parameters to add some logging """ params, info = self._ps_client.fetch_parameter_with_info() if params: params = U.deserialize(params) params = self.on_parameter_fetched(params, info) self._module_dict.load(params) def fetch_parameter_info(self): """ Fetch information about the parameters currently held by the parameter server """ return self._ps_client.fetch_info() def set_agent_mode(self, agent_mode): """ Args: agent_mode: 'training', 'eval_deterministic', or 'eval_stochastic' """ assert agent_mode in AGENT_MODES self.agent_mode = agent_mode