Ejemplo n.º 1
0
    def __init__(
        self,
        obs_spec: Spec,
        act_spec: Spec,
        model_fn: ModelBuilder = None,
        policy_cls: PolicyType = None,
        sess_mgr: SessionManager = None,
        optimizer: tf.train.Optimizer = None,
        n_envs=1,
        value_coef=DEFAULTS['value_coef'],
        entropy_coef=DEFAULTS['entropy_coef'],
        traj_len=DEFAULTS['traj_len'],
        batch_sz=DEFAULTS['batch_sz'],
        discount=DEFAULTS['discount'],
        gae_lambda=DEFAULTS['gae_lambda'],
        clip_rewards=DEFAULTS['clip_rewards'],
        clip_grads_norm=DEFAULTS['clip_grads_norm'],
        normalize_returns=DEFAULTS['normalize_returns'],
        normalize_advantages=DEFAULTS['normalize_advantages'],
    ):
        kwargs = {
            k: v
            for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v
        }

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(self,
                                  obs_spec,
                                  act_spec,
                                  sess_mgr=sess_mgr,
                                  **kwargs)
        self.logger = StreamLogger(n_envs=n_envs,
                                   log_freq=10,
                                   sess_mgr=self.sess_mgr)
Ejemplo n.º 2
0
    def __init__(
        self,
        obs_spec: Spec,
        act_spec: Spec,
        model_fn: ModelBuilder = None,
        policy_cls: PolicyType = None,
        sess_mgr: SessionManager = None,
        optimizer: tf.train.Optimizer = None,
        n_envs=4,
        n_epochs=3,
        minibatch_sz=128,
        clip_ratio=0.2,
        clip_value=0.5,
        value_coef=DEFAULTS['value_coef'],
        entropy_coef=DEFAULTS['entropy_coef'],
        traj_len=DEFAULTS['traj_len'],
        batch_sz=DEFAULTS['batch_sz'],
        discount=DEFAULTS['discount'],
        gae_lambda=DEFAULTS['gae_lambda'],
        clip_rewards=DEFAULTS['clip_rewards'],
        clip_grads_norm=DEFAULTS['clip_grads_norm'],
        normalize_returns=DEFAULTS['normalize_returns'],
        normalize_advantages=DEFAULTS['normalize_advantages'],
        **kwargs,
    ):
        args = kwargs[
            'args'] if 'args' in kwargs else None  #include the experimental args
        self.subenvs = subenvs = kwargs['subenvs'] if 'subenvs' in kwargs else [
        ]  # include specifed subenvs

        kwargs = {
            k: v
            for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v
        }
        kwargs['subenvs'] = subenvs

        self.n_epochs = n_epochs
        self.minibatch_sz = minibatch_sz
        self.clip_ratio = clip_ratio
        self.clip_value = clip_value

        SyncRunningAgent.__init__(self, n_envs, args)
        ActorCriticAgent.__init__(self,
                                  obs_spec,
                                  act_spec,
                                  sess_mgr=sess_mgr,
                                  **kwargs)
        self.logger = StreamLogger(n_envs=n_envs,
                                   log_freq=10,
                                   sess_mgr=self.sess_mgr)

        self.start_step = self.start_step // self.n_epochs