Example #1
0
    def __init__(
            self,
            env,
            name,
            training=True,
            gamma=0.99,
            batch_size=32,
            model_type='dense',
            model_params=None,
            step_size=1,  # only > 1 if model_type is 'lstm'.
            layer_sizes=[32, 32],
            double_q=True,
            dueling=True):
        """
        model_params: 'layer_sizes', 'step_size', 'lstm_layers', 'lstm_size'
        """
        Policy.__init__(self, env, name, gamma=gamma, training=training)
        BaseModelMixin.__init__(self, name)

        assert isinstance(self.env.action_space, Discrete)
        assert isinstance(self.env.observation_space, Box)
        assert model_type in ('dense', 'conv', 'lstm')
        assert step_size == 1 or model_type == 'lstm'

        self.gamma = gamma
        self.batch_size = batch_size
        self.training = training
        self.model_type = model_type
        self.model_params = model_params or {}
        self.layer_sizes = layer_sizes
        self.step_size = step_size
        self.double_q = double_q
        self.dueling = dueling
Example #2
0
    def __init__(self,
                 env,
                 name,
                 training=True,
                 gamma=0.99,
                 lam=0.95,
                 actor_layers=[64, 32],
                 critic_layers=[128, 64],
                 clip_norm=None,
                 **kwargs):
        Policy.__init__(self,
                        env,
                        name,
                        training=training,
                        gamma=gamma,
                        **kwargs)
        BaseModelMixin.__init__(self, name)

        assert isinstance(self.env.action_space, Discrete), \
            "Current PPOPolicy implementation only works for discrete action space."

        self.lam = lam  # lambda for GAE.
        self.actor_layers = actor_layers
        self.critic_layers = critic_layers
        self.clip_norm = clip_norm
Example #3
0
    def __init__(self,
                 env,
                 name,
                 training=True,
                 gamma=0.9,
                 lr_a=0.02,
                 lr_a_decay=0.995,
                 lr_c=0.01,
                 lr_c_decay=0.995,
                 epsilon=1.0,
                 epsilon_final=0.05,
                 batch_size=32,
                 layer_sizes=None,
                 grad_clip_norm=None,
                 **kwargs):
        Policy.__init__(self,
                        env,
                        name,
                        training=training,
                        gamma=gamma,
                        **kwargs)
        BaseTFModelMixin.__init__(self, name)

        self.lr_a = lr_a
        self.lr_a_decay = lr_a_decay
        self.lr_c = lr_c
        self.lr_c_decay = lr_c_decay
        self.epsilon = epsilon
        self.epsilon_final = epsilon_final

        self.batch_size = batch_size
        self.layer_sizes = [64] if layer_sizes is None else layer_sizes
        self.grad_clip_norm = grad_clip_norm

        self.memory = ReplayMemory(tuple_class=Record)
    def __init__(self, env, name, training=True, gamma=0.99,
                 layer_sizes=[32, 32], baseline=False):
        Policy.__init__(self, env, name, training=training, gamma=gamma)
        BaseModelMixin.__init__(self, name)

        self.layer_sizes = layer_sizes
        self.baseline = baseline
Example #5
0
    def __init__(
            self,
            env,
            name,
            training=True,
            gamma=0.99,
            lr=0.001,
            lr_decay=1.0,
            epsilon=1.0,
            epsilon_final=0.01,
            batch_size=64,
            memory_capacity=100000,
            model_type='dense',
            model_params=None,
            step_size=1,  # only > 1 if model_type is 'lstm'.
            layer_sizes=None,  # [64] by default.
            target_update_type='hard',
            target_update_params=None,
            double_q=True,
            dueling=True):
        """
        model_params: 'layer_sizes', 'step_size', 'lstm_layers', 'lstm_size'
        """
        Policy.__init__(self, env, name, gamma=gamma, training=training)
        BaseTFModelMixin.__init__(self, name, saver_max_to_keep=5)

        assert isinstance(self.env.action_space, Discrete)
        assert isinstance(self.env.observation_space, Box)
        assert model_type in ('dense', 'conv', 'lstm')
        assert step_size == 1 or model_type == 'lstm'
        assert target_update_type in ('hard', 'soft')

        self.gamma = gamma
        self.lr = lr
        self.lr_decay = lr_decay
        self.epsilon = epsilon
        self.epsilon_final = epsilon_final
        self.training = training

        self.model_type = model_type
        self.model_params = model_params or {}
        self.layer_sizes = layer_sizes or [32, 32]
        self.step_size = step_size
        self.double_q = double_q
        self.dueling = dueling

        self.target_update_type = target_update_type
        self.target_update_every_step = (target_update_params
                                         or {}).get('every_step', 100)
        self.target_update_tau = (target_update_params or {}).get('tau', 0.05)

        if self.model_type == 'lstm':
            self.memory = ReplayTrajMemory(capacity=memory_capacity,
                                           step_size=step_size)
        else:
            self.memory = ReplayMemory(capacity=memory_capacity)

        self.batch_size = batch_size
Example #6
0
    def __init__(self, env, name, training=True, gamma=0.9, layer_sizes=None, clip_norm=None, **kwargs):
        Policy.__init__(self, env, name, training=training, gamma=gamma, **kwargs)
        BaseModelMixin.__init__(self, name)

        assert isinstance(self.env.action_space, Discrete), \
            "Current ActorCriticPolicy implementation only works for discrete action space."

        self.layer_sizes = [64] if layer_sizes is None else layer_sizes
        self.clip_norm = clip_norm
Example #7
0
    def __init__(self,
                 env,
                 name,
                 training=True,
                 gamma=0.99,
                 lr=0.001,
                 lr_decay=0.999,
                 batch_size=32,
                 layer_sizes=None,
                 baseline=False):
        Policy.__init__(self, env, name, training=training, gamma=gamma)
        BaseTFModelMixin.__init__(self, name)

        self.lr = lr
        self.lr_decay = lr_decay
        self.batch_size = batch_size
        self.layer_sizes = layer_sizes or [32, 32]
        self.baseline = baseline
Example #8
0
    def __init__(self,
                 env,
                 name,
                 training=True,
                 gamma=0.9,
                 actor_layers=[64, 32],
                 critic_layers=[128, 64],
                 **kwargs):
        Policy.__init__(self,
                        env,
                        name,
                        training=training,
                        gamma=gamma,
                        **kwargs)
        BaseModelMixin.__init__(self, name)

        assert isinstance(self.env.action_space, Box), \
            "Current DDPGPolicy implementation only works for continuous action space."

        self.actor_layers = actor_layers
        self.critic_layers = critic_layers