def __init__(self, state_dim, action_spec, embedding_dim=256, hidden_dims=(256, 256), latent_dim=8, sequence_length=2, embed_state=False, action_only=False, learning_rate=None): """Creates networks. Args: state_dim: State size. action_spec: Action spec. embedding_dim: Embedding size. hidden_dims: List of hidden dimensions. latent_dim: Latent action dim. sequence_length: Context length. embed_state: Also embed state. action_only: Only input actions to trajectory embedder. learning_rate: Learning rate. """ super().__init__() self.input_dim = state_dim self.latent_dim = latent_dim self.sequence_length = sequence_length self.action_only = action_only self.embed_state = embed_state self.embedder = StochasticEmbedNet(state_dim, embedding_dim=embedding_dim, hidden_dims=hidden_dims) self.prior = StochasticEmbedNet( embedding_dim if embed_state else state_dim, embedding_dim=latent_dim, hidden_dims=hidden_dims) self.primitive_policy = policies.DiagGuassianPolicy( state_dim + latent_dim, action_spec) action_dim = action_spec.shape[0] self.trajectory_embedder = StochasticRNNEmbedNet( [ self.sequence_length, action_dim + (0 if action_only else state_dim) ], embedding_dim=latent_dim) learning_rate = learning_rate or 1e-4 self.optimizer = tf.keras.optimizers.Adam( learning_rate=learning_rate) #, beta_1=0.0) self.all_variables = self.variables
def __init__(self, state_dim, action_spec, hidden_dims=(256, 256)): """Creates networks. Args: state_dim: State size. action_spec: Action specification. hidden_dims: List of hidden dimensions. """ super().__init__() self.value = CriticNet(state_dim, action_dim=None, hidden_dims=hidden_dims) self.advantage = policies.DiagGuassianPolicy(state_dim, action_spec, hidden_dims=hidden_dims) self.log_alpha = tf.Variable(0.0, dtype=tf.float32, trainable=True)
def __init__(self, state_dim, action_spec, mixture=False, hidden_dims=(256, 256), embed_model=None, finetune=False): """Creates networks. Args: state_dim: State size. action_spec: Action spec. mixture: Whether policy is a mixture of Gaussian. hidden_dims: List of hidden dimensions. embed_model: Pretrained embedder. finetune: Whether to finetune the pretrained embedder. """ self.action_spec = action_spec self.embed_model = embed_model self.finetune = finetune input_state_dim = (self.embed_model.get_input_state_dim() if self.embed_model else state_dim) if mixture: self.policy = policies.MixtureGuassianPolicy( input_state_dim, action_spec, hidden_dims=hidden_dims) else: self.policy = policies.DiagGuassianPolicy(input_state_dim, action_spec, hidden_dims=hidden_dims) boundaries = [180_000, 190_000] values = [1e-3, 1e-4, 1e-5] learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay( boundaries, values) self.optimizer = tf.keras.optimizers.Adam( learning_rate=learning_rate_fn) self.log_alpha = tf.Variable(tf.math.log(1.0), trainable=True) self.alpha_optimizer = tf.keras.optimizers.Adam( learning_rate=learning_rate_fn) self.target_entropy = -self.action_spec.shape[0]
def __init__(self, state_dim, action_spec, embedding_dim=256, fourier_dim=None, sequence_length=2, hidden_dims=(256, 256), shuffle_rate=0.1, mixup_rate=0., kl_regularizer=None, learning_rate=None): """Creates networks. Args: state_dim: State size. action_spec: Action spec. embedding_dim: Embedding size. fourier_dim: Fourier feature size. sequence_length: Context length. hidden_dims: List of hidden dimensions. shuffle_rate: Rate of shuffled embeddings. mixup_rate: Rate of mixup embeddings. kl_regularizer: Apply uniform KL to action decoder. learning_rate: Learning rate. """ super().__init__() self.state_dim = state_dim self.action_dim = action_spec.shape[0] self.embedding_dim = embedding_dim self.fourier_dim = fourier_dim self.latent_dim = self.fourier_dim or self.embedding_dim self.sequence_length = sequence_length self.shuffle_rate = shuffle_rate self.mixup_rate = mixup_rate self.kl_regularizer = kl_regularizer self.embedder = EmbedNet( self.state_dim + (self.action_dim if self.sequence_length == 2 else self.embedding_dim), embedding_dim=self.embedding_dim, hidden_dims=hidden_dims) self.next_embedder = EmbedNet(state_dim, embedding_dim=self.embedding_dim, hidden_dims=hidden_dims) self.trajectory_embedder = RNNEmbedNet( [self.sequence_length, self.action_dim + state_dim], embedding_dim=self.embedding_dim) self.primitive_policy = policies.DiagGuassianPolicy( state_dim + (self.fourier_dim or self.embedding_dim), action_spec, hidden_dims=hidden_dims) learning_rate = learning_rate or 3e-4 self.optimizer = tf.keras.optimizers.Adam( learning_rate=learning_rate) #, beta_1=0.0) self.log_alpha = tf.Variable(tf.math.log(1.0), trainable=True) self.target_entropy = -action_spec.shape[0] if self.fourier_dim: self.omega = tf.Variable(tf.random.normal( [self.fourier_dim, self.embedding_dim]), trainable=False) self.shift = tf.Variable(tf.random.uniform([self.fourier_dim], minval=0, maxval=2 * 3.14159), trainable=False) self.average_embed = tf.Variable(tf.zeros([self.embedding_dim]), trainable=False) self.average_square = tf.Variable(tf.ones([self.embedding_dim]), trainable=False) self.pretrain_variables = (self.embedder.variables + self.next_embedder.variables + self.primitive_policy.variables + self.trajectory_embedder.variables + [self.log_alpha])
def __init__(self, state_dim, action_spec, actor_lr=3e-4, critic_lr=3e-4, alpha_lr=3e-4, discount=0.99, tau=0.005, target_update_period=1, target_entropy=0.0, cross_norm=False, pcl_actor_update=False, embed_model=None, other_embed_model=None, network='default', finetune=False): """Creates networks. Args: state_dim: State size. action_spec: Action spec. actor_lr: Actor learning rate. critic_lr: Critic learning rate. alpha_lr: Temperature learning rate. discount: MDP discount. tau: Soft target update parameter. target_update_period: Target network update period. target_entropy: Target entropy. cross_norm: Whether to fit cross norm critic. pcl_actor_update: Whether to use PCL actor update. embed_model: Pretrained embedder. other_embed_model: Pretrained embedder. Used for critic if specified. network: Type of actor/critic net. finetune: Whether to finetune the pretrained embedder. """ self.action_spec = action_spec self.embed_model = embed_model self.other_embed_model = other_embed_model or embed_model self.finetune = finetune input_state_dim = (self.embed_model.get_input_state_dim() if self.embed_model else state_dim) if cross_norm: beta_1 = 0.0 else: beta_1 = 0.9 hidden_dims = ([] if network == 'none' else (256, ) if network == 'small' else (256, 256)) self.actor = policies.DiagGuassianPolicy(input_state_dim, action_spec, hidden_dims=hidden_dims) self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr, beta_1=beta_1) self.log_alpha = tf.Variable(tf.math.log(0.1), trainable=True) self.alpha_optimizer = tf.keras.optimizers.Adam(learning_rate=alpha_lr, beta_1=beta_1) if cross_norm: assert network == 'default' self.critic_learner = critic.CrossNormCriticLearner( input_state_dim, action_spec.shape[0], critic_lr, discount, tau) else: self.critic_learner = critic.CriticLearner(input_state_dim, action_spec.shape[0], critic_lr, discount, tau, target_update_period, hidden_dims=hidden_dims) self.target_entropy = target_entropy self.discount = discount self.pcl_actor_update = pcl_actor_update
def __init__(self, state_dim, action_spec, actor_lr=3e-5, critic_lr=3e-4, alpha_lr=1e-4, discount=0.99, tau=0.005, target_entropy=0.0, bc_alpha=1.0, embed_model=None, other_embed_model=None, bc_embed_model=None, network='default', finetune=False): """Creates networks. Args: state_dim: State size. action_spec: Action spec. actor_lr: Actor learning rate. critic_lr: Critic learning rate. alpha_lr: Temperature learning rate. discount: MDP discount. tau: Soft target update parameter. target_entropy: Target entropy. bc_alpha: Policy regularization weight. embed_model: Pretrained embedder. other_embed_model: Pretrained embedder. Used for critic if specified. bc_embed_model: Pretrained embedder. Used for behavior cloning if specified. network: Type of actor/critic net. finetune: Whether to finetune the pretrained embedder. """ self.action_spec = action_spec self.embed_model = embed_model self.other_embed_model = other_embed_model or embed_model self.finetune = finetune input_state_dim = (self.embed_model.get_input_state_dim() if self.embed_model else state_dim) hidden_dims = ([] if network == 'none' else (256, ) if network == 'small' else (256, 256)) self.actor = policies.DiagGuassianPolicy(input_state_dim, action_spec, hidden_dims=hidden_dims) self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr) self.log_alpha = tf.Variable(tf.math.log(0.1), trainable=True) self.alpha_optimizer = tf.keras.optimizers.Adam(learning_rate=alpha_lr) self.target_entropy = target_entropy self.discount = discount self.tau = tau self.bc = behavioral_cloning.BehavioralCloning( state_dim, action_spec, mixture=True, hidden_dims=hidden_dims, embed_model=bc_embed_model or self.embed_model, finetune=self.finetune) self.bc_alpha = bc_alpha action_dim = action_spec.shape[0] self.critic = critic.Critic(input_state_dim, action_dim, hidden_dims=hidden_dims) self.critic_target = critic.Critic(input_state_dim, action_dim, hidden_dims=hidden_dims) critic.soft_update(self.critic, self.critic_target, tau=1.0) self.critic_optimizer = tf.keras.optimizers.Adam( learning_rate=critic_lr)