def __init__( self, obs_spec: Spec, act_spec: Spec, model_fn: ModelBuilder = None, policy_cls: PolicyType = None, sess_mgr: SessionManager = None, optimizer: tf.train.Optimizer = None, n_envs=1, value_coef=DEFAULTS['value_coef'], entropy_coef=DEFAULTS['entropy_coef'], traj_len=DEFAULTS['traj_len'], batch_sz=DEFAULTS['batch_sz'], discount=DEFAULTS['discount'], gae_lambda=DEFAULTS['gae_lambda'], clip_rewards=DEFAULTS['clip_rewards'], clip_grads_norm=DEFAULTS['clip_grads_norm'], normalize_returns=DEFAULTS['normalize_returns'], normalize_advantages=DEFAULTS['normalize_advantages'], ): kwargs = { k: v for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v } SyncRunningAgent.__init__(self, n_envs) ActorCriticAgent.__init__(self, obs_spec, act_spec, sess_mgr=sess_mgr, **kwargs) self.logger = StreamLogger(n_envs=n_envs, log_freq=10, sess_mgr=self.sess_mgr)
def __init__( self, obs_spec: Spec, act_spec: Spec, model_fn: ModelBuilder = None, policy_cls: PolicyType = None, sess_mgr: SessionManager = None, optimizer: tf.train.Optimizer = None, n_envs=4, n_epochs=3, minibatch_sz=128, clip_ratio=0.2, clip_value=0.5, value_coef=DEFAULTS['value_coef'], entropy_coef=DEFAULTS['entropy_coef'], traj_len=DEFAULTS['traj_len'], batch_sz=DEFAULTS['batch_sz'], discount=DEFAULTS['discount'], gae_lambda=DEFAULTS['gae_lambda'], clip_rewards=DEFAULTS['clip_rewards'], clip_grads_norm=DEFAULTS['clip_grads_norm'], normalize_returns=DEFAULTS['normalize_returns'], normalize_advantages=DEFAULTS['normalize_advantages'], **kwargs, ): args = kwargs[ 'args'] if 'args' in kwargs else None #include the experimental args self.subenvs = subenvs = kwargs['subenvs'] if 'subenvs' in kwargs else [ ] # include specifed subenvs kwargs = { k: v for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v } kwargs['subenvs'] = subenvs self.n_epochs = n_epochs self.minibatch_sz = minibatch_sz self.clip_ratio = clip_ratio self.clip_value = clip_value SyncRunningAgent.__init__(self, n_envs, args) ActorCriticAgent.__init__(self, obs_spec, act_spec, sess_mgr=sess_mgr, **kwargs) self.logger = StreamLogger(n_envs=n_envs, log_freq=10, sess_mgr=self.sess_mgr) self.start_step = self.start_step // self.n_epochs