Exemple #1
0
    def __init__(self,
                 state_dim,
                 action_spec,
                 embedding_dim=256,
                 hidden_dims=(256, 256),
                 latent_dim=8,
                 sequence_length=2,
                 embed_state=False,
                 action_only=False,
                 learning_rate=None):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action spec.
      embedding_dim: Embedding size.
      hidden_dims: List of hidden dimensions.
      latent_dim: Latent action dim.
      sequence_length: Context length.
      embed_state: Also embed state.
      action_only: Only input actions to trajectory embedder.
      learning_rate: Learning rate.
    """
        super().__init__()
        self.input_dim = state_dim
        self.latent_dim = latent_dim
        self.sequence_length = sequence_length
        self.action_only = action_only
        self.embed_state = embed_state

        self.embedder = StochasticEmbedNet(state_dim,
                                           embedding_dim=embedding_dim,
                                           hidden_dims=hidden_dims)
        self.prior = StochasticEmbedNet(
            embedding_dim if embed_state else state_dim,
            embedding_dim=latent_dim,
            hidden_dims=hidden_dims)
        self.primitive_policy = policies.DiagGuassianPolicy(
            state_dim + latent_dim, action_spec)

        action_dim = action_spec.shape[0]
        self.trajectory_embedder = StochasticRNNEmbedNet(
            [
                self.sequence_length, action_dim +
                (0 if action_only else state_dim)
            ],
            embedding_dim=latent_dim)

        learning_rate = learning_rate or 1e-4
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=learning_rate)  #, beta_1=0.0)

        self.all_variables = self.variables
Exemple #2
0
    def __init__(self, state_dim, action_spec, hidden_dims=(256, 256)):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action specification.
      hidden_dims: List of hidden dimensions.
    """
        super().__init__()
        self.value = CriticNet(state_dim,
                               action_dim=None,
                               hidden_dims=hidden_dims)

        self.advantage = policies.DiagGuassianPolicy(state_dim,
                                                     action_spec,
                                                     hidden_dims=hidden_dims)

        self.log_alpha = tf.Variable(0.0, dtype=tf.float32, trainable=True)
    def __init__(self,
                 state_dim,
                 action_spec,
                 mixture=False,
                 hidden_dims=(256, 256),
                 embed_model=None,
                 finetune=False):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action spec.
      mixture: Whether policy is a mixture of Gaussian.
      hidden_dims: List of hidden dimensions.
      embed_model: Pretrained embedder.
      finetune: Whether to finetune the pretrained embedder.
    """
        self.action_spec = action_spec
        self.embed_model = embed_model
        self.finetune = finetune
        input_state_dim = (self.embed_model.get_input_state_dim()
                           if self.embed_model else state_dim)

        if mixture:
            self.policy = policies.MixtureGuassianPolicy(
                input_state_dim, action_spec, hidden_dims=hidden_dims)
        else:
            self.policy = policies.DiagGuassianPolicy(input_state_dim,
                                                      action_spec,
                                                      hidden_dims=hidden_dims)

        boundaries = [180_000, 190_000]
        values = [1e-3, 1e-4, 1e-5]
        learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
            boundaries, values)

        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=learning_rate_fn)

        self.log_alpha = tf.Variable(tf.math.log(1.0), trainable=True)
        self.alpha_optimizer = tf.keras.optimizers.Adam(
            learning_rate=learning_rate_fn)

        self.target_entropy = -self.action_spec.shape[0]
Exemple #4
0
    def __init__(self,
                 state_dim,
                 action_spec,
                 embedding_dim=256,
                 fourier_dim=None,
                 sequence_length=2,
                 hidden_dims=(256, 256),
                 shuffle_rate=0.1,
                 mixup_rate=0.,
                 kl_regularizer=None,
                 learning_rate=None):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action spec.
      embedding_dim: Embedding size.
      fourier_dim: Fourier feature size.
      sequence_length: Context length.
      hidden_dims: List of hidden dimensions.
      shuffle_rate: Rate of shuffled embeddings.
      mixup_rate: Rate of mixup embeddings.
      kl_regularizer: Apply uniform KL to action decoder.
      learning_rate: Learning rate.
    """
        super().__init__()
        self.state_dim = state_dim
        self.action_dim = action_spec.shape[0]
        self.embedding_dim = embedding_dim
        self.fourier_dim = fourier_dim
        self.latent_dim = self.fourier_dim or self.embedding_dim
        self.sequence_length = sequence_length
        self.shuffle_rate = shuffle_rate
        self.mixup_rate = mixup_rate
        self.kl_regularizer = kl_regularizer

        self.embedder = EmbedNet(
            self.state_dim + (self.action_dim if self.sequence_length == 2 else
                              self.embedding_dim),
            embedding_dim=self.embedding_dim,
            hidden_dims=hidden_dims)
        self.next_embedder = EmbedNet(state_dim,
                                      embedding_dim=self.embedding_dim,
                                      hidden_dims=hidden_dims)

        self.trajectory_embedder = RNNEmbedNet(
            [self.sequence_length, self.action_dim + state_dim],
            embedding_dim=self.embedding_dim)

        self.primitive_policy = policies.DiagGuassianPolicy(
            state_dim + (self.fourier_dim or self.embedding_dim),
            action_spec,
            hidden_dims=hidden_dims)

        learning_rate = learning_rate or 3e-4
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=learning_rate)  #, beta_1=0.0)

        self.log_alpha = tf.Variable(tf.math.log(1.0), trainable=True)
        self.target_entropy = -action_spec.shape[0]

        if self.fourier_dim:
            self.omega = tf.Variable(tf.random.normal(
                [self.fourier_dim, self.embedding_dim]),
                                     trainable=False)
            self.shift = tf.Variable(tf.random.uniform([self.fourier_dim],
                                                       minval=0,
                                                       maxval=2 * 3.14159),
                                     trainable=False)
            self.average_embed = tf.Variable(tf.zeros([self.embedding_dim]),
                                             trainable=False)
            self.average_square = tf.Variable(tf.ones([self.embedding_dim]),
                                              trainable=False)

        self.pretrain_variables = (self.embedder.variables +
                                   self.next_embedder.variables +
                                   self.primitive_policy.variables +
                                   self.trajectory_embedder.variables +
                                   [self.log_alpha])
Exemple #5
0
    def __init__(self,
                 state_dim,
                 action_spec,
                 actor_lr=3e-4,
                 critic_lr=3e-4,
                 alpha_lr=3e-4,
                 discount=0.99,
                 tau=0.005,
                 target_update_period=1,
                 target_entropy=0.0,
                 cross_norm=False,
                 pcl_actor_update=False,
                 embed_model=None,
                 other_embed_model=None,
                 network='default',
                 finetune=False):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action spec.
      actor_lr: Actor learning rate.
      critic_lr: Critic learning rate.
      alpha_lr: Temperature learning rate.
      discount: MDP discount.
      tau: Soft target update parameter.
      target_update_period: Target network update period.
      target_entropy: Target entropy.
      cross_norm: Whether to fit cross norm critic.
      pcl_actor_update: Whether to use PCL actor update.
      embed_model: Pretrained embedder.
      other_embed_model: Pretrained embedder. Used for critic if specified.
      network: Type of actor/critic net.
      finetune: Whether to finetune the pretrained embedder.
    """
        self.action_spec = action_spec
        self.embed_model = embed_model
        self.other_embed_model = other_embed_model or embed_model

        self.finetune = finetune

        input_state_dim = (self.embed_model.get_input_state_dim()
                           if self.embed_model else state_dim)

        if cross_norm:
            beta_1 = 0.0
        else:
            beta_1 = 0.9

        hidden_dims = ([] if network == 'none' else
                       (256, ) if network == 'small' else (256, 256))
        self.actor = policies.DiagGuassianPolicy(input_state_dim,
                                                 action_spec,
                                                 hidden_dims=hidden_dims)
        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr,
                                                        beta_1=beta_1)

        self.log_alpha = tf.Variable(tf.math.log(0.1), trainable=True)
        self.alpha_optimizer = tf.keras.optimizers.Adam(learning_rate=alpha_lr,
                                                        beta_1=beta_1)

        if cross_norm:
            assert network == 'default'
            self.critic_learner = critic.CrossNormCriticLearner(
                input_state_dim, action_spec.shape[0], critic_lr, discount,
                tau)
        else:
            self.critic_learner = critic.CriticLearner(input_state_dim,
                                                       action_spec.shape[0],
                                                       critic_lr,
                                                       discount,
                                                       tau,
                                                       target_update_period,
                                                       hidden_dims=hidden_dims)

        self.target_entropy = target_entropy
        self.discount = discount

        self.pcl_actor_update = pcl_actor_update
    def __init__(self,
                 state_dim,
                 action_spec,
                 actor_lr=3e-5,
                 critic_lr=3e-4,
                 alpha_lr=1e-4,
                 discount=0.99,
                 tau=0.005,
                 target_entropy=0.0,
                 bc_alpha=1.0,
                 embed_model=None,
                 other_embed_model=None,
                 bc_embed_model=None,
                 network='default',
                 finetune=False):
        """Creates networks.

    Args:
      state_dim: State size.
      action_spec: Action spec.
      actor_lr: Actor learning rate.
      critic_lr: Critic learning rate.
      alpha_lr: Temperature learning rate.
      discount: MDP discount.
      tau: Soft target update parameter.
      target_entropy: Target entropy.
      bc_alpha: Policy regularization weight.
      embed_model: Pretrained embedder.
      other_embed_model: Pretrained embedder. Used for critic if specified.
      bc_embed_model: Pretrained embedder. Used for behavior
        cloning if specified.
      network: Type of actor/critic net.
      finetune: Whether to finetune the pretrained embedder.
    """
        self.action_spec = action_spec
        self.embed_model = embed_model
        self.other_embed_model = other_embed_model or embed_model
        self.finetune = finetune
        input_state_dim = (self.embed_model.get_input_state_dim()
                           if self.embed_model else state_dim)
        hidden_dims = ([] if network == 'none' else
                       (256, ) if network == 'small' else (256, 256))
        self.actor = policies.DiagGuassianPolicy(input_state_dim,
                                                 action_spec,
                                                 hidden_dims=hidden_dims)
        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr)

        self.log_alpha = tf.Variable(tf.math.log(0.1), trainable=True)
        self.alpha_optimizer = tf.keras.optimizers.Adam(learning_rate=alpha_lr)

        self.target_entropy = target_entropy
        self.discount = discount
        self.tau = tau

        self.bc = behavioral_cloning.BehavioralCloning(
            state_dim,
            action_spec,
            mixture=True,
            hidden_dims=hidden_dims,
            embed_model=bc_embed_model or self.embed_model,
            finetune=self.finetune)

        self.bc_alpha = bc_alpha

        action_dim = action_spec.shape[0]
        self.critic = critic.Critic(input_state_dim,
                                    action_dim,
                                    hidden_dims=hidden_dims)
        self.critic_target = critic.Critic(input_state_dim,
                                           action_dim,
                                           hidden_dims=hidden_dims)
        critic.soft_update(self.critic, self.critic_target, tau=1.0)
        self.critic_optimizer = tf.keras.optimizers.Adam(
            learning_rate=critic_lr)