def __init__( self, env, name, training=True, gamma=0.99, batch_size=32, model_type='dense', model_params=None, step_size=1, # only > 1 if model_type is 'lstm'. layer_sizes=[32, 32], double_q=True, dueling=True): """ model_params: 'layer_sizes', 'step_size', 'lstm_layers', 'lstm_size' """ Policy.__init__(self, env, name, gamma=gamma, training=training) BaseModelMixin.__init__(self, name) assert isinstance(self.env.action_space, Discrete) assert isinstance(self.env.observation_space, Box) assert model_type in ('dense', 'conv', 'lstm') assert step_size == 1 or model_type == 'lstm' self.gamma = gamma self.batch_size = batch_size self.training = training self.model_type = model_type self.model_params = model_params or {} self.layer_sizes = layer_sizes self.step_size = step_size self.double_q = double_q self.dueling = dueling
def __init__(self, env, name, training=True, gamma=0.99, lam=0.95, actor_layers=[64, 32], critic_layers=[128, 64], clip_norm=None, **kwargs): Policy.__init__(self, env, name, training=training, gamma=gamma, **kwargs) BaseModelMixin.__init__(self, name) assert isinstance(self.env.action_space, Discrete), \ "Current PPOPolicy implementation only works for discrete action space." self.lam = lam # lambda for GAE. self.actor_layers = actor_layers self.critic_layers = critic_layers self.clip_norm = clip_norm
def __init__(self, env, name, training=True, gamma=0.9, lr_a=0.02, lr_a_decay=0.995, lr_c=0.01, lr_c_decay=0.995, epsilon=1.0, epsilon_final=0.05, batch_size=32, layer_sizes=None, grad_clip_norm=None, **kwargs): Policy.__init__(self, env, name, training=training, gamma=gamma, **kwargs) BaseTFModelMixin.__init__(self, name) self.lr_a = lr_a self.lr_a_decay = lr_a_decay self.lr_c = lr_c self.lr_c_decay = lr_c_decay self.epsilon = epsilon self.epsilon_final = epsilon_final self.batch_size = batch_size self.layer_sizes = [64] if layer_sizes is None else layer_sizes self.grad_clip_norm = grad_clip_norm self.memory = ReplayMemory(tuple_class=Record)
def __init__(self, env, name, training=True, gamma=0.99, layer_sizes=[32, 32], baseline=False): Policy.__init__(self, env, name, training=training, gamma=gamma) BaseModelMixin.__init__(self, name) self.layer_sizes = layer_sizes self.baseline = baseline
def __init__( self, env, name, training=True, gamma=0.99, lr=0.001, lr_decay=1.0, epsilon=1.0, epsilon_final=0.01, batch_size=64, memory_capacity=100000, model_type='dense', model_params=None, step_size=1, # only > 1 if model_type is 'lstm'. layer_sizes=None, # [64] by default. target_update_type='hard', target_update_params=None, double_q=True, dueling=True): """ model_params: 'layer_sizes', 'step_size', 'lstm_layers', 'lstm_size' """ Policy.__init__(self, env, name, gamma=gamma, training=training) BaseTFModelMixin.__init__(self, name, saver_max_to_keep=5) assert isinstance(self.env.action_space, Discrete) assert isinstance(self.env.observation_space, Box) assert model_type in ('dense', 'conv', 'lstm') assert step_size == 1 or model_type == 'lstm' assert target_update_type in ('hard', 'soft') self.gamma = gamma self.lr = lr self.lr_decay = lr_decay self.epsilon = epsilon self.epsilon_final = epsilon_final self.training = training self.model_type = model_type self.model_params = model_params or {} self.layer_sizes = layer_sizes or [32, 32] self.step_size = step_size self.double_q = double_q self.dueling = dueling self.target_update_type = target_update_type self.target_update_every_step = (target_update_params or {}).get('every_step', 100) self.target_update_tau = (target_update_params or {}).get('tau', 0.05) if self.model_type == 'lstm': self.memory = ReplayTrajMemory(capacity=memory_capacity, step_size=step_size) else: self.memory = ReplayMemory(capacity=memory_capacity) self.batch_size = batch_size
def __init__(self, env, name, training=True, gamma=0.9, layer_sizes=None, clip_norm=None, **kwargs): Policy.__init__(self, env, name, training=training, gamma=gamma, **kwargs) BaseModelMixin.__init__(self, name) assert isinstance(self.env.action_space, Discrete), \ "Current ActorCriticPolicy implementation only works for discrete action space." self.layer_sizes = [64] if layer_sizes is None else layer_sizes self.clip_norm = clip_norm
def __init__(self, env, name, training=True, gamma=0.99, lr=0.001, lr_decay=0.999, batch_size=32, layer_sizes=None, baseline=False): Policy.__init__(self, env, name, training=training, gamma=gamma) BaseTFModelMixin.__init__(self, name) self.lr = lr self.lr_decay = lr_decay self.batch_size = batch_size self.layer_sizes = layer_sizes or [32, 32] self.baseline = baseline
def __init__(self, env, name, training=True, gamma=0.9, actor_layers=[64, 32], critic_layers=[128, 64], **kwargs): Policy.__init__(self, env, name, training=training, gamma=gamma, **kwargs) BaseModelMixin.__init__(self, name) assert isinstance(self.env.action_space, Box), \ "Current DDPGPolicy implementation only works for continuous action space." self.actor_layers = actor_layers self.critic_layers = critic_layers