Beispiel #1
0
    def __init__(
            self,
            obs_spec,
            act_spec,
            model_fn=build_mlp,
            policy_cls=MultiPolicy,
            sess_mgr=None,
            n_envs=4,
            traj_len=16,
            batch_sz=16,
            discount=0.99,
            gae_lambda=0.95,
            clip_rewards=0.0,
            normalize_advantages=True,
            bootstrap_terminals=False,
            clip_grads_norm=0.0,
            value_coef=0.5,
            entropy_coef=0.001,
            optimizer=tf.train.AdamOptimizer(),
            logger=Logger(),
    ):
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(self, obs_spec, act_spec, model_fn,
                                  policy_cls, sess_mgr, traj_len, batch_sz,
                                  discount, gae_lambda, clip_rewards,
                                  normalize_advantages, bootstrap_terminals,
                                  clip_grads_norm, optimizer, logger)
Beispiel #2
0
    def __init__(
        self,
        obs_spec: Spec,
        act_spec: Spec,
        model_fn: ModelBuilder = None,
        policy_cls: PolicyType = None,
        sess_mgr: SessionManager = None,
        optimizer: tf.train.Optimizer = None,
        n_envs=1,
        value_coef=DEFAULTS['value_coef'],
        entropy_coef=DEFAULTS['entropy_coef'],
        traj_len=DEFAULTS['traj_len'],
        batch_sz=DEFAULTS['batch_sz'],
        discount=DEFAULTS['discount'],
        gae_lambda=DEFAULTS['gae_lambda'],
        clip_rewards=DEFAULTS['clip_rewards'],
        clip_grads_norm=DEFAULTS['clip_grads_norm'],
        normalize_returns=DEFAULTS['normalize_returns'],
        normalize_advantages=DEFAULTS['normalize_advantages'],
    ):
        kwargs = {
            k: v
            for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v
        }

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(self,
                                  obs_spec,
                                  act_spec,
                                  sess_mgr=sess_mgr,
                                  **kwargs)
        self.logger = StreamLogger(n_envs=n_envs,
                                   log_freq=10,
                                   sess_mgr=self.sess_mgr)
Beispiel #3
0
    def __init__(
            self,
            obs_spec: Spec,
            act_spec: Spec,
            model_fn: ModelBuilder,
            policy_cls: PolicyType,
            sess_mgr: SessionManager = None,
            n_envs=4,
            traj_len=16,
            batch_sz=16,
            discount=0.99,
            gae_lambda=0.95,
            clip_rewards=0.0,
            normalize_advantages=True,
            clip_grads_norm=0.0,
            value_coef=0.5,
            entropy_coef=0.001,
            optimizer=tf.train.AdamOptimizer(),
            logger=Logger(),
    ):
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(self, obs_spec, act_spec, model_fn,
                                  policy_cls, sess_mgr, traj_len, batch_sz,
                                  discount, gae_lambda, clip_rewards,
                                  normalize_advantages, clip_grads_norm,
                                  optimizer, logger)
Beispiel #4
0
    def __init__(
        self,
        obs_spec,
        act_spec,
        model_fn=build_mlp,
        policy_cls=MultiPolicy,
        sess_mgr=None,
        n_envs=4,
        traj_len=16,
        batch_sz=16,
        discount=0.99,
        gae_lambda=0.95,
        clip_rewards=0.0,
        normalize_advantages=True,
        bootstrap_terminals=False,
        clip_grads_norm=0.0,
        value_coef=0.5,
        entropy_coef=0.001,
        optimizer=tf.train.AdamOptimizer(),
        logger=Logger(),
    ):
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(
            self, obs_spec, act_spec, model_fn, policy_cls, sess_mgr, traj_len, batch_sz, discount,
            gae_lambda, clip_rewards, normalize_advantages, bootstrap_terminals, clip_grads_norm, optimizer, logger
        )
Beispiel #5
0
    def __init__(
        self,
        obs_spec: Spec,
        act_spec: Spec,
        model_fn: ModelBuilder = None,
        policy_cls: PolicyType = None,
        sess_mgr: SessionManager = None,
        optimizer: tf.train.Optimizer = None,
        n_envs=4,
        n_epochs=3,
        minibatch_sz=128,
        clip_ratio=0.2,
        clip_value=0.5,
        value_coef=DEFAULTS['value_coef'],
        entropy_coef=DEFAULTS['entropy_coef'],
        traj_len=DEFAULTS['traj_len'],
        batch_sz=DEFAULTS['batch_sz'],
        discount=DEFAULTS['discount'],
        gae_lambda=DEFAULTS['gae_lambda'],
        clip_rewards=DEFAULTS['clip_rewards'],
        clip_grads_norm=DEFAULTS['clip_grads_norm'],
        normalize_returns=DEFAULTS['normalize_returns'],
        normalize_advantages=DEFAULTS['normalize_advantages'],
        **kwargs,
    ):
        args = kwargs[
            'args'] if 'args' in kwargs else None  #include the experimental args
        self.subenvs = subenvs = kwargs['subenvs'] if 'subenvs' in kwargs else [
        ]  # include specifed subenvs

        kwargs = {
            k: v
            for k, v in locals().items() if k in DEFAULTS and DEFAULTS[k] != v
        }
        kwargs['subenvs'] = subenvs

        self.n_epochs = n_epochs
        self.minibatch_sz = minibatch_sz
        self.clip_ratio = clip_ratio
        self.clip_value = clip_value

        SyncRunningAgent.__init__(self, n_envs, args)
        ActorCriticAgent.__init__(self,
                                  obs_spec,
                                  act_spec,
                                  sess_mgr=sess_mgr,
                                  **kwargs)
        self.logger = StreamLogger(n_envs=n_envs,
                                   log_freq=10,
                                   sess_mgr=self.sess_mgr)

        self.start_step = self.start_step // self.n_epochs
Beispiel #6
0
    def __init__(
            self,
            obs_spec: Spec,
            act_spec: Spec,
            model_fn: ModelBuilder,
            policy_cls: PolicyType,
            sess_mgr: SessionManager = None,
            n_envs=4,
            traj_len=16,
            batch_sz=16,
            discount=0.99,
            gae_lambda=0.95,
            clip_rewards=0.0,
            normalize_advantages=True,
            bootstrap_terminals=False,
            clip_grads_norm=0.0,
            n_updates=3,
            minibatch_sz=128,
            clip_ratio=0.2,
            value_coef=0.5,
            entropy_coef=0.001,
            optimizer=tf.train.AdamOptimizer(),
            logger=Logger(),
    ):
        self.n_updates = n_updates
        self.minibatch_sz = minibatch_sz
        self.clip_ratio = clip_ratio
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        SyncRunningAgent.__init__(self, n_envs)
        ActorCriticAgent.__init__(self, obs_spec, act_spec, model_fn,
                                  policy_cls, sess_mgr, traj_len, batch_sz,
                                  discount, gae_lambda, clip_rewards,
                                  normalize_advantages, bootstrap_terminals,
                                  clip_grads_norm, optimizer, logger)

        self.start_step = self.start_step // self.n_updates